Python ArticleParser Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: ArticleParser

Examples at hotexamples.com: 5

Python ArticleParser - 5 examples found. These are the top rated real world Python examples of utils.ArticleParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ArticleParser(5)

close(5)

feed(5)

get_articles(5)

Example #1

Show file

def parse_list_html(raw):
    pattern = re.compile(r"(?s)<li>(.*?)<\/li>")
    pattern_title = re.compile(r".*\d+年\d+月\d+日.*肺炎疫情.*")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = root_url + res["href"][2:]
            break
    return latest_url

Example #2

Show file

def parse_list_html(raw):
    pattern = re.compile(r"(?s)<li class=\"list-group-item\">(.*?)<\/li>")
    pattern_title = re.compile(r".*河南.*肺炎疫情.*情况")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"]
            break
    return latest_url

Example #3

Show file

def parse_list_html(raw):
    pattern = re.compile(r"<li>(.*?)<\/li>")
    pattern_title = re.compile(r"最新疫情通报...")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"]
            break
    return latest_url

Example #4

Show file

def parse_list_html(raw):
    pattern = re.compile(r"(<td.*?<\/td>)")
    pattern_title = re.compile(r".*肺炎疫情情况")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"][2:]
            break
    return latest_url

Example #5

Show file

def parse_list_html(raw):
    pattern = re.compile(
        r"(?s)<tr id=\"line17117_1\" height=\"20\">(.*?)<\/tr>")
    pattern_title = re.compile(r".*肺炎疫情.*")

    p = ArticleParser()
    for m in pattern.finditer(raw):
        p.feed(m.groups()[0])
    p.close()

    latest_url = ""
    for res in p.get_articles():
        if pattern_title.match(res["title"]) is not None:
            latest_url = base_url + res["href"][5:]
            break
    return latest_url