Example #1
0
def parse_list_html(raw):
    pattern = re.compile(r"(?s)<li>(.*?)<\/li>")
    pattern_title = re.compile(r".*\d+年\d+月\d+日.*肺炎疫情.*")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = root_url + res["href"][2:]
            break
    return latest_url
Example #2
0
def parse_list_html(raw):
    pattern = re.compile(r"(?s)<li class=\"list-group-item\">(.*?)<\/li>")
    pattern_title = re.compile(r".*河南.*肺炎疫情.*情况")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"]
            break
    return latest_url
Example #3
0
def parse_list_html(raw):
    pattern = re.compile(r"<li>(.*?)<\/li>")
    pattern_title = re.compile(r"最新疫情通报...")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"]
            break
    return latest_url
Example #4
0
def parse_list_html(raw):
    pattern = re.compile(r"(<td.*?<\/td>)")
    pattern_title = re.compile(r".*肺炎疫情情况")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"][2:]
            break
    return latest_url
Example #5
0
def parse_list_html(raw):
    pattern = re.compile(
        r"(?s)<tr id=\"line17117_1\" height=\"20\">(.*?)<\/tr>")
    pattern_title = re.compile(r".*肺炎疫情.*")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"][5:]
            break
    return latest_url