Esempio n. 1
0
def test_refresh_noscript():
    t = '<meta http-equiv="refresh" content="0; url=http://www.ExampleOnly.com/"/>'
    tno = '<noscript>' + t + '</noscript>'

    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-http-equiv-refresh', '0; url=http://www.ExampleOnly.com/')]
    head_soup = BeautifulSoup(tno, 'lxml')
    facets = facet.find_head_facets(tno, head_soup)
    assert facets == [('meta-http-equiv-refresh-noscript', '0; url=http://www.ExampleOnly.com/')]
Esempio n. 2
0
def test_refresh_noscript():
    t = '<meta http-equiv="refresh" content="0; url=http://www.ExampleOnly.com/"/>'
    tno = '<noscript>' + t + '</noscript>'

    facets = facet.find_head_facets(t)
    assert facets == [('meta-http-equiv-refresh',
                       '0; url=http://www.ExampleOnly.com/')]
    facets = facet.find_head_facets(tno)
    assert facets == [('meta-http-equiv-refresh-noscript',
                       '0; url=http://www.ExampleOnly.com/')]
Esempio n. 3
0
def test_twitter():
    t = '''
    <meta property="twitter:card" content="summary_large_image" />
    <meta property="twitter:site" content="@ABC" />
    <meta property="twitter:creator" content="@brianross" />
    <meta name="twitter:app:id:iphone" content="300255638" />
    <meta name="twitter:app:url:iphone" content="abcnewsiphone://link/story,42263092" />
    <meta name="twitter:app:name:ipad" content="ABC News" />
    <meta name="twitter:app:id:ipad" content="306934135" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-name-twitter:app:id:iphone', '300255638'),
                      ('meta-name-twitter:app:url:iphone', 'abcnewsiphone://link/story,42263092'),
                      ('meta-name-twitter:app:name:ipad', 'ABC News'),
                      ('meta-name-twitter:app:id:ipad', '306934135'),
                      ('meta-property-twitter:card', 'summary_large_image'),
                      ('meta-property-twitter:site', '@ABC'),
                      ('meta-property-twitter:creator', '@brianross')]

    facets = facet.facet_dedup(facets)
    assert facets == [('meta-name-twitter:app:id:iphone', '300255638'),
                      ('meta-name-twitter:app:url:iphone', 'abcnewsiphone://link/story,42263092'),
                      ('meta-name-twitter:app:name:ipad', 'ABC News'),
                      ('meta-name-twitter:app:id:ipad', '306934135'),
                      ('meta-property-twitter:card', 'summary_large_image'),
                      ('meta-property-twitter:site', '@ABC'),
                      ('meta-property-twitter:creator', '@brianross')]
Esempio n. 4
0
def test_misc():
    t = '''
    <html lang="fr">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('html lang', 'fr')]
    t = '''
    <html xml:lang="fr" xmlns="http://www.w3.org/1999/xhtml">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('html xml:lang', 'fr')]
    t = '''
    <base href="http://example.com/">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('base', 'http://example.com/')]
Esempio n. 5
0
def test_twitter():
    t = '''
    <meta property="twitter:card" content="summary_large_image" />
    <meta property="twitter:site" content="@ABC" />
    <meta property="twitter:creator" content="@brianross" />
    <meta name="twitter:app:id:iphone" content="300255638" />
    <meta name="twitter:app:url:iphone" content="abcnewsiphone://link/story,42263092" />
    <meta name="twitter:app:name:ipad" content="ABC News" />
    <meta name="twitter:app:id:ipad" content="306934135" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-name-twitter:app:id:iphone', '300255638'),
                      ('meta-name-twitter:app:url:iphone',
                       'abcnewsiphone://link/story,42263092'),
                      ('meta-name-twitter:app:name:ipad', 'ABC News'),
                      ('meta-name-twitter:app:id:ipad', '306934135'),
                      ('meta-property-twitter:card', 'summary_large_image'),
                      ('meta-property-twitter:site', '@ABC'),
                      ('meta-property-twitter:creator', '@brianross')]

    facets = facet.facet_dedup(facets)
    assert facets == [('meta-name-twitter:app:id:iphone', '300255638'),
                      ('meta-name-twitter:app:url:iphone',
                       'abcnewsiphone://link/story,42263092'),
                      ('meta-name-twitter:app:name:ipad', 'ABC News'),
                      ('meta-name-twitter:app:id:ipad', '306934135'),
                      ('meta-property-twitter:card', 'summary_large_image'),
                      ('meta-property-twitter:site', '@ABC'),
                      ('meta-property-twitter:creator', '@brianross')]
Esempio n. 6
0
def test_google_stuff():
    t = '''
    <script type="text/javascript" defer="defer" async="async" src="//www.google-analytics.com/analytics.js?oeorvp"></script>
    <script src="http://www.google.com/adsense/domains/caf.js"></script>
    <script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
    '''
    facets = facet.find_head_facets(t)
    assert facets == 'foo'
Esempio n. 7
0
def test_applinks():  # fb + Parse
    t = '''
    <meta property="al:ios:url" content="applinks://docs" />
    <meta property="al:ios:app_store_id" content="12345" />
    <meta property="al:ios:app_name" content="App Links" />
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('applinks', True), ('applinks', True),
                      ('applinks', True)]
Esempio n. 8
0
def test_misc_meta_name():
    t = '''
    <meta name="parsely-title" content="Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising Effort">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [(
        'meta-name-parsely-title',
        'Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising '
        'Effort')]
Esempio n. 9
0
def test_google_stuff():
    t = '''
    <script type="text/javascript" defer="defer" async="async" src="//www.google-analytics.com/analytics.js?oeorvp"></script>
    <script src="http://www.google.com/adsense/domains/caf.js"></script>
    <script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == 'foo'
Esempio n. 10
0
def test_misc_meta_name():
    t = '''
    <meta name="parsely-title" content="Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising Effort">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-name-parsely-title',
                       'Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising '
                       'Effort')]
Esempio n. 11
0
def test_misc():
    t = '''
    <html lang="fr">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('html lang', 'fr')]
    t = '''
    <html xml:lang="fr" xmlns="http://www.w3.org/1999/xhtml">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('html xml:lang', 'fr')]
    t = '''
    <base href="http://example.com/">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('base', 'http://example.com/')]
Esempio n. 12
0
def test_applinks():  # fb + Parse
    t = '''
    <meta property="al:ios:url" content="applinks://docs" />
    <meta property="al:ios:app_store_id" content="12345" />
    <meta property="al:ios:app_name" content="App Links" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-property-al:ios:url', 'applinks://docs'),
                      ('meta-property-al:ios:app_store_id', '12345'),
                      ('meta-property-al:ios:app_name', 'App Links')]
Esempio n. 13
0
def test_applinks():  # fb + Parse
    t = '''
    <meta property="al:ios:url" content="applinks://docs" />
    <meta property="al:ios:app_store_id" content="12345" />
    <meta property="al:ios:app_name" content="App Links" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-property-al:ios:url', 'applinks://docs'),
                      ('meta-property-al:ios:app_store_id', '12345'),
                      ('meta-property-al:ios:app_name', 'App Links')]
Esempio n. 14
0
def test_generator():
    t = '''
    <meta name="generator" content="WordPress 2.5.1" />
    <meta name="generator" content="Movable Type 3.33" />
    <meta name="generator" content="Movable Type Publishing Platform 4.01" />
    <meta name="generator" content="Drupal 7 (http://drupal.org)" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-name-generator', 'WordPress 2.5.1'),
                      ('meta-name-generator', 'Movable Type 3.33'),
                      ('meta-name-generator', 'Movable Type Publishing Platform 4.01'),
                      ('meta-name-generator', 'Drupal 7 (http://drupal.org)')]
Esempio n. 15
0
def test_generator():
    t = '''
    <meta name="generator" content="WordPress 2.5.1" />
    <meta name="generator" content="Movable Type 3.33" />
    <meta name="generator" content="Movable Type Publishing Platform 4.01" />
    <meta name="generator" content="Drupal 7 (http://drupal.org)" />
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('meta-name-generator', 'WordPress 2.5.1'),
                      ('meta-name-generator', 'Movable Type 3.33'),
                      ('meta-name-generator',
                       'Movable Type Publishing Platform 4.01'),
                      ('meta-name-generator', 'Drupal 7 (http://drupal.org)')]
Esempio n. 16
0
def test_facebook():
    t = '''
    <meta property="fb:admins" content="704409894" />
    <meta property="fb:app_id" content="4942312939" />
    <meta property="og:site_name" content="ABC News" />
    <link rel="opengraph" href="..." />
    <link rel="origin" href="..."/>
    <meta property="op:markup_version" content="v1.0">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('fb:admins', '704409894'), ('fb:app_id', '4942312939'),
                      ('opengraph', True), ('fb instant', True),
                      ('opengraph', ('...', 'notype')),
                      ('origin', ('...', 'notype'))]
Esempio n. 17
0
def test_link_rel():
    t = '''
    <link rel="amphtml" href="http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092" />
    <link rel="alternate" type="application/rss+xml" title="App Links &raquo; FAQs Comments Feed" href="http://applinks.org/faqs/feed/" />
    <link rel="canonical" href="https://www.bloomberg.com/news/articles/2016-10-31/foo">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('link-rel-amphtml',
                       {'href': 'http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092'}),
                      ('link-rel-alternate',
                       {'href': 'http://applinks.org/faqs/feed/',
                        'title': 'App Links \u00bb FAQs Comments Feed',
                        'type': 'application/rss+xml'}),
                      ('link-rel-canonical',
                       {'href': 'https://www.bloomberg.com/news/articles/2016-10-31/foo'})]
Esempio n. 18
0
def test_double_entries():
    t = '''
    <meta name="robots" content="noarchive" />
    <meta name="robots" content="index, follow" />
    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    <meta charset="utf-8">
    <meta name="referrer" content="unsafe-url">
    <meta name="referrer" content="always">
    <meta name="format-detection" content="telephone=no"/>
    <meta name="format-detection" content="email=no"/>
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('robots', 'noarchive'), ('robots', 'index, follow'),
                      ('referrer', 'unsafe-url'), ('referrer', 'always'),
                      ('format-detection', 'telephone=no'),
                      ('format-detection', 'email=no')]
Esempio n. 19
0
def test_link_rel():
    t = '''
    <link rel="amphtml" href="http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092" />
    <link rel="alternate" type="application/rss+xml" title="App Links &raquo; FAQs Comments Feed" href="http://applinks.org/faqs/feed/" />
    <link rel="canonical" href="https://www.bloomberg.com/news/articles/2016-10-31/postmates-secures-141-million-in-a-super-super-difficult-fundraising-effort">
    '''
    facets = facet.find_head_facets(t)
    assert facets == [
        ('amphtml',
         ('http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092',
          'notype')),
        ('alternate', ('http://applinks.org/faqs/feed/',
                       'application/rss+xml')),
        ('canonical',
         ('https://www.bloomberg.com/news/articles/2016-10-31/postmates-secures-141-million-in-a-super-super-difficult-fundraising-effort',
          'notype'))
    ]
Esempio n. 20
0
def test_facebook():
    t = '''
    <meta property="fb:admins" content="704409894" />
    <meta property="fb:app_id" content="4942312939" />
    <meta property="og:site_name" content="ABC News" />
    <link rel="opengraph" href="..." />
    <link rel="origin" href="..."/>
    <meta property="op:markup_version" content="v1.0">
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-property-fb:admins', '704409894'),
                      ('meta-property-fb:app_id', '4942312939'),
                      ('meta-property-og:site_name', 'ABC News'),
                      ('meta-property-op:markup_version', 'v1.0'),
                      ('link-rel-opengraph', {'href': '...'}),
                      ('link-rel-origin', {'href': '...'})]
Esempio n. 21
0
def test_integrity():
    t = '''
    <script src="https://example.com/example-framework.js"
        integrity="sha384-Li9vy3DqF8tnTXuiaAJuML3ky+er10rcgNR/VqsVpcw+ThHmYcwiB1pbOxEbzJr7"
        crossorigin="anonymous"></script>

    <script src="hello_world.js"
    integrity="sha384-dOTZf16X8p34q2/kYyEFm0jh89uTjikhnzjeLeF0FHsEaYKb1A1cv+Lyv4Hk8vHd
              sha512-Q2bFTOhEALkN8hOms2FKTDLy7eugP2zFZ1T8LCvX42Fp3WoNr3bjZSAHeOsHrbV1Fu9/A0EzCinRE7Af1ofPrw=="
    crossorigin="anonymous"></script>

    <link rel="opengraph" href="http://example.com"
        integrity="sha384-Li9vy3DqF8tnTXuiaAJuML3ky+er10rcgNR/VqsVpcw+ThHmYcwiB1pbOxEbzJr7" />
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('link-rel-opengraph', {'href': 'http://example.com'}),
                      ('thing-script integrity', 3)]
Esempio n. 22
0
def test_twitter():
    t = '''
    <meta property="twitter:card" content="summary_large_image" />
    <meta property="twitter:site" content="@ABC" />
    <meta property="twitter:creator" content="@brianross" />
    <meta name="twitter:app:id:iphone" content="300255638" />
    <meta name="twitter:app:url:iphone" content="abcnewsiphone://link/story,42263092" />
    <meta name="twitter:app:name:ipad" content="ABC News" />
    <meta name="twitter:app:id:ipad" content="306934135" />
    '''
    facets = facet.find_head_facets(t)
    assert facets == [('twitter card', True), ('twitter card', True),
                      ('twitter card', True), ('twitter card', True),
                      ('twitter:site', '@ABC'),
                      ('twitter:creator', '@brianross')]

    facets = facet.facet_dedup(facets)
    assert facets == [('twitter card', True), ('twitter:site', '@ABC'),
                      ('twitter:creator', '@brianross')]
Esempio n. 23
0
def test_double_entries():
    t = '''
    <meta name="robots" content="noarchive" />
    <meta name="robots" content="index, follow" />
    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    <meta charset="utf-8">
    <meta name="referrer" content="unsafe-url">
    <meta name="referrer" content="always">
    <meta name="format-detection" content="telephone=no"/>
    <meta name="format-detection" content="email=no"/>
    '''
    head_soup = BeautifulSoup(t, 'lxml')
    facets = facet.find_head_facets(t, head_soup)
    assert facets == [('meta-name-robots', 'noarchive'),
                      ('meta-name-robots', 'index, follow'),
                      ('meta-name-referrer', 'unsafe-url'),
                      ('meta-name-referrer', 'always'),
                      ('meta-name-format-detection', 'telephone=no'),
                      ('meta-name-format-detection', 'email=no'),
                      ('meta-http-equiv-content-type', 'text/html; charset=utf-8')]