Esempio n. 1
0
def test_extract_title_removes_unwanted_characters():

    html = """
        <meta property="og:title" content="Trump Denies Charitable Donation He Promised If Elizabeth Warren Releases DNA Results And It&#8217;s On Video" />
    """
    expected = "Trump Denies Charitable Donation He Promised If Elizabeth Warren Releases DNA Results And It’s On Video"
    assert extract_title(html) == expected
Esempio n. 2
0
def test_extract_title_pick_shortest_version_of_equivalent_title():

    html = """
            <h1 class="entry-title">Pamela Geller in Breitbart News: Dueling Billboards from CAIR, AFDI in Times Square</h1>
            <meta property="og:title" content="Pamela Geller in Breitbart News: Dueling Billboards from CAIR, AFDI in Times Square - Geller Report" />
        """
    expected = "Pamela Geller in Breitbart News: Dueling Billboards from CAIR, AFDI in Times Square"
    assert extract_title(html) == expected
Esempio n. 3
0
def test_extract_title_prioritises_highest_score_xpath():

    html = """
            <h2 class="title">Silly title</h2>
            <h1 class="entry-title">Example title</h1>
            <header><h1>Bad title</h1></header>
            <p>Hello world</p>
    """
    expected = "Example title"
    assert extract_title(html) == expected
Esempio n. 4
0
def test_extract_title_gets_text_within_hyperlinks():

    html = """
        <h1 class="entry-title">
            <a href="http://addictinginfo.com/2018/10/15/trump-denies-charitable-donation-he-promised-if-elizabeth-warren-releases-dna-results-and-its-on-video/"
                title="Permalink to Trump Denies Charitable Donation He Promised If Elizabeth Warren Releases DNA Results And It&#8217;s On Video"
                rel="bookmark">Trump Denies Charitable Donation He Promised If Elizabeth Warren Releases DNA Results And
                It&#8217;s On Video</a>
        </h1>
    """
    expected = "Trump Denies Charitable Donation He Promised If Elizabeth Warren Releases DNA Results And It’s On Video"
    assert extract_title(html) == expected
Esempio n. 5
0
def test_extract_title(html, expected):
    assert extract_title(html) == expected