Ejemplo n.º 1
0
def test_extract_article_full_page_content_digest_node_indexes():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_simple_article_from_full_page_content_digest_node_indexes.json",
        content_digests=True,
        node_indexes=True)
Ejemplo n.º 2
0
def test_extract_article_list_items_node_indexes():
    check_extract_article(
        "list_items_full_page.html",
        "list_items_simple_article_from_full_page_node_indexes.json",
        node_indexes=True)
Ejemplo n.º 3
0
def test_extract_article_full_page():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_simple_article_from_full_page.json")
Ejemplo n.º 4
0
def test_extract_article_headers_and_non_paragraph_blockquote_text():
    check_extract_article(
        "davidwolfe.com-1_full_page.html",
        "davidwolfe.com-1_simple_article_from_full_page.json")
Ejemplo n.º 5
0
def test_extract_article_list_items_content_digests():
    check_extract_article(
        "list_items_full_page.html",
        "list_items_simple_article_from_full_page_content_digests.json",
        content_digests=True)
Ejemplo n.º 6
0
def test_extract_article_list_items():
    check_extract_article("list_items_full_page.html",
                          "list_items_simple_article_from_full_page.json")
Ejemplo n.º 7
0
def test_extract_article_unicode_normalisation():
    check_extract_article(
        "conservativehq.com-1_full_page.html",
        "conservativehq.com-1_simple_article_from_full_page.json")
Ejemplo n.º 8
0
def test_extract_article_non_article():
    check_extract_article("non_article_full_page.html",
                          "non_article_full_page.json")
Ejemplo n.º 9
0
def test_extract_simple_article_with_readability_js():
    check_extract_article(
        "plain-content-test_full_article.html",
        "plain-content-test_full_article_javascript.json",
        use_readability_js=True
    )
Ejemplo n.º 10
0
def test_extract_article_from_page_with_readability_js():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_full_page_javascript.json",
        use_readability_js=True
    )