def parse_list_html(raw): pattern = re.compile(r"(?s)<li>(.*?)<\/li>") pattern_title = re.compile(r".*\d+年\d+月\d+日.*肺炎疫情.*") p = ArticleParser() for m in pattern.finditer(raw): p.feed(m.groups()[0]) p.close() latest_url = "" for res in p.get_articles(): if pattern_title.match(res["title"]) is not None: latest_url = root_url + res["href"][2:] break return latest_url
def parse_list_html(raw): pattern = re.compile(r"(?s)<li class=\"list-group-item\">(.*?)<\/li>") pattern_title = re.compile(r".*河南.*肺炎疫情.*情况") p = ArticleParser() for m in pattern.finditer(raw): p.feed(m.groups()[0]) p.close() latest_url = "" for res in p.get_articles(): if pattern_title.match(res["title"]) is not None: latest_url = base_url + res["href"] break return latest_url
def parse_list_html(raw): pattern = re.compile(r"<li>(.*?)<\/li>") pattern_title = re.compile(r"最新疫情通报...") p = ArticleParser() for m in pattern.finditer(raw): p.feed(m.groups()[0]) p.close() latest_url = "" for res in p.get_articles(): if pattern_title.match(res["title"]) is not None: latest_url = base_url + res["href"] break return latest_url
def parse_list_html(raw): pattern = re.compile(r"(<td.*?<\/td>)") pattern_title = re.compile(r".*肺炎疫情情况") p = ArticleParser() for m in pattern.finditer(raw): p.feed(m.groups()[0]) p.close() latest_url = "" for res in p.get_articles(): if pattern_title.match(res["title"]) is not None: latest_url = base_url + res["href"][2:] break return latest_url
def parse_list_html(raw): pattern = re.compile( r"(?s)<tr id=\"line17117_1\" height=\"20\">(.*?)<\/tr>") pattern_title = re.compile(r".*肺炎疫情.*") p = ArticleParser() for m in pattern.finditer(raw): p.feed(m.groups()[0]) p.close() latest_url = "" for res in p.get_articles(): if pattern_title.match(res["title"]) is not None: latest_url = base_url + res["href"][5:] break return latest_url