Esempio n. 1
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<!-- 新闻内容 -->(.*)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计出院病例(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计死亡病例(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、:]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = i.groups()[0]
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Esempio n. 2
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=\"danye\">(.*?)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"确诊病例(\d+)例")
    cm = pattern_confirm.findall(content)
    if len(cm) > 0:
        province.Confirmed = max([int(x) for x in cm])
    pattern_heal = re.compile(r"出院病例(\d+)例")
    hm = pattern_heal.findall(content)
    if len(hm) > 0:
        province.Healed = max([int(x) for x in hm])
    pattern_dead = re.compile(r"死亡病例(\d+)例")
    dm = pattern_dead.findall(content)
    if len(dm) > 0:
        province.Dead = max([int(x) for x in dm])

    city = {}
    pattern_data = re.compile(r"[、:]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = i.groups()[0]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Esempio n. 3
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<!--content begin -->(.*)<!--content end -->")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[。,、]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("累计确诊"):]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
Esempio n. 4
0
def parse_content_html(raw):
    pattern = re.compile(r"<!--ZJEG_RSS.content.begin-->(.*)<!--ZJEG_RSS.content.end-->")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计报告.*?确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计报告.*?(\d+)例治愈出院")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计报告.*?(\d+)例死亡")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = i.groups()[0]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
        if name in append_city.keys():
            id = cities[append_city[name]]
            city[id].Confirmed += int(i.groups()[1])
    return province, city
Esempio n. 5
0
def parse_content_html(raw):
    pattern = re.compile(
        r"<div class=\"view TRS_UEDITOR trs_paper_default trs_word trs_key4format\">(.*)<\/div>"
    )
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计.*?出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"累计.*?死亡(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,;]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = i.groups()[0]
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            d = CityData(name, id)
            d.Confirmed = int(i.groups()[1])
            city[id] = d
    return province, city
Esempio n. 6
0
 def test_provincedata_serialize(self):
     d = ProvinceData("广东", "guangdong")
     d.Dead = 1
     d.Confirmed = 2
     d.Healed = 3
     dd = utils.serialize(d)
     self.assertDictEqual(
         dd, {"province": "广东", "id": "guangdong", "confirmed": 2, "dead": 1, "healed": 3, "cities": []})
Esempio n. 7
0
def parse_content_html(raw):
    pattern = re.compile(r"<p>(.*?累计.*?)<\/p>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"治愈出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例(([\u4E00-\u9FA5]+))")
    dm = pattern_dead.search(content)
    dc = {}
    if dm is not None:
        province.Dead = int(dm.groups()[0])
        dc[dm.groups()[1]] = int(dm.groups()[0])  # SPECIAL HANDLE

    city = {}
    pattern_data = re.compile(r"[(,、。]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = utils.remove_preposition(i.groups()[0])
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
            else:
                city[id].Confirmed += int(i.groups()[1])
    # SPECIAL HANDLE
    for dd in dc.items():
        if dd[0] in cities.keys():
            id = cities[dd[0]]
            if id not in city.keys():
                d = CityData(dd[0], id)
                d.Confirmed = dd[1]
                city[id] = d
            else:
                city[id].Confirmed += dd[1]
    return province, city