def parse_content_html(raw): pattern = re.compile(r"(?s)<!-- 新闻内容 -->(.*)<\/div>") m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"累计.*?确诊病例(\d+)例") cm = pattern_confirm.search(content) if cm is not None: province.Confirmed = int(cm.groups()[0]) pattern_heal = re.compile(r"累计出院病例(\d+)例") hm = pattern_heal.search(content) if hm is not None: province.Healed = int(hm.groups()[0]) pattern_dead = re.compile(r"累计死亡病例(\d+)例") dm = pattern_dead.search(content) if dm is not None: province.Dead = int(dm.groups()[0]) city = {} pattern_data = re.compile(r"[,、:]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]): name = i.groups()[0] if name in alia_cities.keys(): name = alia_cities[name] if name in cities.keys(): id = cities[name] if id not in city.keys(): d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d return province, city
def parse_content_html(raw): pattern = re.compile(r"(?s)<div class=\"danye\">(.*?)<\/div>") m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"确诊病例(\d+)例") cm = pattern_confirm.findall(content) if len(cm) > 0: province.Confirmed = max([int(x) for x in cm]) pattern_heal = re.compile(r"出院病例(\d+)例") hm = pattern_heal.findall(content) if len(hm) > 0: province.Healed = max([int(x) for x in hm]) pattern_dead = re.compile(r"死亡病例(\d+)例") dm = pattern_dead.findall(content) if len(dm) > 0: province.Dead = max([int(x) for x in dm]) city = {} pattern_data = re.compile(r"[、:]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content): name = i.groups()[0] if name in cities.keys(): id = cities[name] if id not in city.keys(): d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d return province, city
def parse_content_html(raw): pattern = re.compile(r"(?s)<!--content begin -->(.*)<!--content end -->") m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"累计确诊.*?(\d+)例") cm = pattern_confirm.search(content) if cm is not None: province.Confirmed = int(cm.groups()[0]) pattern_heal = re.compile(r"出院(\d+)例") hm = pattern_heal.search(content) if hm is not None: province.Healed = int(hm.groups()[0]) pattern_dead = re.compile(r"死亡(\d+)例") dm = pattern_dead.search(content) if dm is not None: province.Dead = int(dm.groups()[0]) city = {} pattern_data = re.compile(r"[。,、]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content[content.rfind("累计确诊"):]): name = utils.remove_preposition(i.groups()[0]) if name in cities.keys(): id = cities[name] if id not in city.keys(): d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d return province, city
def parse_content_html(raw): pattern = re.compile(r"<!--ZJEG_RSS.content.begin-->(.*)<!--ZJEG_RSS.content.end-->") m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"累计报告.*?确诊病例(\d+)例") cm = pattern_confirm.search(content) if cm is not None: province.Confirmed = int(cm.groups()[0]) pattern_heal = re.compile(r"累计报告.*?(\d+)例治愈出院") hm = pattern_heal.search(content) if hm is not None: province.Healed = int(hm.groups()[0]) pattern_dead = re.compile(r"累计报告.*?(\d+)例死亡") dm = pattern_dead.search(content) if dm is not None: province.Dead = int(dm.groups()[0]) city = {} pattern_data = re.compile(r"[,、]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]): name = i.groups()[0] if name in cities.keys(): id = cities[name] if id not in city.keys(): d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d if name in append_city.keys(): id = cities[append_city[name]] city[id].Confirmed += int(i.groups()[1]) return province, city
def parse_content_html(raw): pattern = re.compile( r"<div class=\"view TRS_UEDITOR trs_paper_default trs_word trs_key4format\">(.*)<\/div>" ) m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"累计.*?病例(\d+)例") cm = pattern_confirm.search(content) if cm is not None: province.Confirmed = int(cm.groups()[0]) pattern_heal = re.compile(r"累计.*?出院(\d+)例") hm = pattern_heal.search(content) if hm is not None: province.Healed = int(hm.groups()[0]) pattern_dead = re.compile(r"累计.*?死亡(\d+)例") dm = pattern_dead.search(content) if dm is not None: province.Dead = int(dm.groups()[0]) city = {} pattern_data = re.compile(r"[,;]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content): name = i.groups()[0] if name in alia_cities.keys(): name = alia_cities[name] if name in cities.keys(): id = cities[name] d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d return province, city
def test_provincedata_serialize(self): d = ProvinceData("广东", "guangdong") d.Dead = 1 d.Confirmed = 2 d.Healed = 3 dd = utils.serialize(d) self.assertDictEqual( dd, {"province": "广东", "id": "guangdong", "confirmed": 2, "dead": 1, "healed": 3, "cities": []})
def parse_content_html(raw): pattern = re.compile(r"<p>(.*?累计.*?)<\/p>") m = pattern.search(raw) content = m.groups()[0] province = ProvinceData(provinceName, provinceKey) pattern_confirm = re.compile(r"确诊病例(\d+)例") cm = pattern_confirm.search(content) if cm is not None: province.Confirmed = int(cm.groups()[0]) pattern_heal = re.compile(r"治愈出院(\d+)例") hm = pattern_heal.search(content) if hm is not None: province.Healed = int(hm.groups()[0]) pattern_dead = re.compile(r"死亡(\d+)例(([\u4E00-\u9FA5]+))") dm = pattern_dead.search(content) dc = {} if dm is not None: province.Dead = int(dm.groups()[0]) dc[dm.groups()[1]] = int(dm.groups()[0]) # SPECIAL HANDLE city = {} pattern_data = re.compile(r"[(,、。]([\u4E00-\u9FA5]+)(\d+)例") for i in pattern_data.finditer(content): name = utils.remove_preposition(i.groups()[0]) if name in alia_cities.keys(): name = alia_cities[name] if name in cities.keys(): id = cities[name] if id not in city.keys(): d = CityData(name, id) d.Confirmed = int(i.groups()[1]) city[id] = d else: city[id].Confirmed += int(i.groups()[1]) # SPECIAL HANDLE for dd in dc.items(): if dd[0] in cities.keys(): id = cities[dd[0]] if id not in city.keys(): d = CityData(dd[0], id) d.Confirmed = dd[1] city[id] = d else: city[id].Confirmed += dd[1] return province, city