Example #1
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<!--content begin -->(.*)<!--content end -->")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[。,、]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("累计确诊"):]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Example #2
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=TRS_Editor>(.*?)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊病例.*?>(\d+)<.*?例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"治愈出院.*?>(\d+)<.*?例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、]([\u4E00-\u9FA5]+).*?>(\d+)<.*?例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Example #3
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<!-- 新闻内容 -->(.*)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计出院病例(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计死亡病例(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、:]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = i.groups()[0]
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Example #4
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=\"danye\">(.*?)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"确诊病例(\d+)例")
    cm = pattern_confirm.findall(content)
    if len(cm) > 0:
        province.Confirmed = max([int(x) for x in cm])
    pattern_heal = re.compile(r"出院病例(\d+)例")
    hm = pattern_heal.findall(content)
    if len(hm) > 0:
        province.Healed = max([int(x) for x in hm])
    pattern_dead = re.compile(r"死亡病例(\d+)例")
    dm = pattern_dead.findall(content)
    if len(dm) > 0:
        province.Dead = max([int(x) for x in dm])

    city = {}
    pattern_data = re.compile(r"[、:]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = i.groups()[0]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Example #5
0
def parse_content_html(raw):
    pattern = re.compile(r"<!--ZJEG_RSS.content.begin-->(.*)<!--ZJEG_RSS.content.end-->")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计报告.*?确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计报告.*?(\d+)例治愈出院")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计报告.*?(\d+)例死亡")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = i.groups()[0]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
        if name in append_city.keys():
            id = cities[append_city[name]]
            city[id].Confirmed += int(i.groups()[1])
    return province, city
Example #6
0
def parse_content_html(raw):
    pattern = re.compile(
        r"<div class=\"view TRS_UEDITOR trs_paper_default trs_word trs_key4format\">(.*)<\/div>"
    )
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计.*?出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计.*?死亡(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,;]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = i.groups()[0]
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            d = CityData(name, id)
            d.Confirmed = int(i.groups()[1])
            city[id] = d
    return province, city
Example #7
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=\"ze-art\" style=\"width: 100%;\">(.*)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计治愈出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("累计报告"):content.rfind("累计治愈")]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Example #8
0
def parse_content_html(raw):
    pattern = re.compile(
        r"<div class=\"view TRS_UEDITOR trs_paper_default trs_web\">(.*)<\/div>"
    )
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计确诊.*?病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"出院病例(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])

    city = {}
    name = "拉萨市"
    if name in cities.keys():
        id = cities[name]
        if id not in city.keys():
            d = CityData(name, id)
            d.Confirmed = 1
            city[id] = d
    return province, city
Example #9
0
def parse_content_html(raw):
    pattern = re.compile(
        r"(?s)<!------------------------- mian开始 ------------------------->(.*)<!--------责任编辑相关---------->"
    )
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r",出院病例(\d+)例")
    for hm in pattern_heal.finditer(content):
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[\.\u200b]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(
            content[content.rfind("累计报告"):content.rfind("重症病例")]):
        name = utils.remove_preposition(i.groups()[0])
        if '盟' in name[:-1]:
            name = name[:name.find('盟') + 1]
        if '市' in name[:-1]:
            name = name[:name.find('市') + 1]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
            else:
                city[id].Confirmed += int(i.groups()[1])
    return province, city
Example #10
0
 def test_provincedata_serialize(self):
     d = ProvinceData("广东", "guangdong")
     d.Dead = 1
     d.Confirmed = 2
     d.Healed = 3
     dd = utils.serialize(d)
     self.assertDictEqual(
         dd, {"province": "广东", "id": "guangdong", "confirmed": 2, "dead": 1, "healed": 3, "cities": []})
Example #11
0
def parse_content_html(raw):
    pattern = re.compile(r"<p>(.*?累计.*?)<\/p>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"治愈出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例(([\u4E00-\u9FA5]+))")
    dm = pattern_dead.search(content)
    dc = {}
    if dm is not None:
        province.Dead = int(dm.groups()[0])
        dc[dm.groups()[1]] = int(dm.groups()[0])  # SPECIAL HANDLE

    city = {}
    pattern_data = re.compile(r"[(,、。]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = utils.remove_preposition(i.groups()[0])
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
            else:
                city[id].Confirmed += int(i.groups()[1])
    # SPECIAL HANDLE
    for dd in dc.items():
        if dd[0] in cities.keys():
            id = cities[dd[0]]
            if id not in city.keys():
                d = CityData(dd[0], id)
                d.Confirmed = dd[1]
                city[id] = d
            else:
                city[id].Confirmed += dd[1]
    return province, city