def test_refresh_noscript(): t = '<meta http-equiv="refresh" content="0; url=http://www.ExampleOnly.com/"/>' tno = '<noscript>' + t + '</noscript>' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-http-equiv-refresh', '0; url=http://www.ExampleOnly.com/')] head_soup = BeautifulSoup(tno, 'lxml') facets = facet.find_head_facets(tno, head_soup) assert facets == [('meta-http-equiv-refresh-noscript', '0; url=http://www.ExampleOnly.com/')]
def test_refresh_noscript(): t = '<meta http-equiv="refresh" content="0; url=http://www.ExampleOnly.com/"/>' tno = '<noscript>' + t + '</noscript>' facets = facet.find_head_facets(t) assert facets == [('meta-http-equiv-refresh', '0; url=http://www.ExampleOnly.com/')] facets = facet.find_head_facets(tno) assert facets == [('meta-http-equiv-refresh-noscript', '0; url=http://www.ExampleOnly.com/')]
def test_twitter(): t = ''' <meta property="twitter:card" content="summary_large_image" /> <meta property="twitter:site" content="@ABC" /> <meta property="twitter:creator" content="@brianross" /> <meta name="twitter:app:id:iphone" content="300255638" /> <meta name="twitter:app:url:iphone" content="abcnewsiphone://link/story,42263092" /> <meta name="twitter:app:name:ipad" content="ABC News" /> <meta name="twitter:app:id:ipad" content="306934135" /> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-name-twitter:app:id:iphone', '300255638'), ('meta-name-twitter:app:url:iphone', 'abcnewsiphone://link/story,42263092'), ('meta-name-twitter:app:name:ipad', 'ABC News'), ('meta-name-twitter:app:id:ipad', '306934135'), ('meta-property-twitter:card', 'summary_large_image'), ('meta-property-twitter:site', '@ABC'), ('meta-property-twitter:creator', '@brianross')] facets = facet.facet_dedup(facets) assert facets == [('meta-name-twitter:app:id:iphone', '300255638'), ('meta-name-twitter:app:url:iphone', 'abcnewsiphone://link/story,42263092'), ('meta-name-twitter:app:name:ipad', 'ABC News'), ('meta-name-twitter:app:id:ipad', '306934135'), ('meta-property-twitter:card', 'summary_large_image'), ('meta-property-twitter:site', '@ABC'), ('meta-property-twitter:creator', '@brianross')]
def test_misc(): t = ''' <html lang="fr"> ''' facets = facet.find_head_facets(t) assert facets == [('html lang', 'fr')] t = ''' <html xml:lang="fr" xmlns="http://www.w3.org/1999/xhtml"> ''' facets = facet.find_head_facets(t) assert facets == [('html xml:lang', 'fr')] t = ''' <base href="http://example.com/"> ''' facets = facet.find_head_facets(t) assert facets == [('base', 'http://example.com/')]
def test_google_stuff(): t = ''' <script type="text/javascript" defer="defer" async="async" src="//www.google-analytics.com/analytics.js?oeorvp"></script> <script src="http://www.google.com/adsense/domains/caf.js"></script> <script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> ''' facets = facet.find_head_facets(t) assert facets == 'foo'
def test_applinks(): # fb + Parse t = ''' <meta property="al:ios:url" content="applinks://docs" /> <meta property="al:ios:app_store_id" content="12345" /> <meta property="al:ios:app_name" content="App Links" /> ''' facets = facet.find_head_facets(t) assert facets == [('applinks', True), ('applinks', True), ('applinks', True)]
def test_misc_meta_name(): t = ''' <meta name="parsely-title" content="Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising Effort"> ''' facets = facet.find_head_facets(t) assert facets == [( 'meta-name-parsely-title', 'Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising ' 'Effort')]
def test_google_stuff(): t = ''' <script type="text/javascript" defer="defer" async="async" src="//www.google-analytics.com/analytics.js?oeorvp"></script> <script src="http://www.google.com/adsense/domains/caf.js"></script> <script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == 'foo'
def test_misc_meta_name(): t = ''' <meta name="parsely-title" content="Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising Effort"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-name-parsely-title', 'Postmates Secures $141 Million in a ‘Super, Super Difficult’ Fundraising ' 'Effort')]
def test_misc(): t = ''' <html lang="fr"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('html lang', 'fr')] t = ''' <html xml:lang="fr" xmlns="http://www.w3.org/1999/xhtml"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('html xml:lang', 'fr')] t = ''' <base href="http://example.com/"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('base', 'http://example.com/')]
def test_applinks(): # fb + Parse t = ''' <meta property="al:ios:url" content="applinks://docs" /> <meta property="al:ios:app_store_id" content="12345" /> <meta property="al:ios:app_name" content="App Links" /> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-property-al:ios:url', 'applinks://docs'), ('meta-property-al:ios:app_store_id', '12345'), ('meta-property-al:ios:app_name', 'App Links')]
def test_generator(): t = ''' <meta name="generator" content="WordPress 2.5.1" /> <meta name="generator" content="Movable Type 3.33" /> <meta name="generator" content="Movable Type Publishing Platform 4.01" /> <meta name="generator" content="Drupal 7 (http://drupal.org)" /> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-name-generator', 'WordPress 2.5.1'), ('meta-name-generator', 'Movable Type 3.33'), ('meta-name-generator', 'Movable Type Publishing Platform 4.01'), ('meta-name-generator', 'Drupal 7 (http://drupal.org)')]
def test_generator(): t = ''' <meta name="generator" content="WordPress 2.5.1" /> <meta name="generator" content="Movable Type 3.33" /> <meta name="generator" content="Movable Type Publishing Platform 4.01" /> <meta name="generator" content="Drupal 7 (http://drupal.org)" /> ''' facets = facet.find_head_facets(t) assert facets == [('meta-name-generator', 'WordPress 2.5.1'), ('meta-name-generator', 'Movable Type 3.33'), ('meta-name-generator', 'Movable Type Publishing Platform 4.01'), ('meta-name-generator', 'Drupal 7 (http://drupal.org)')]
def test_facebook(): t = ''' <meta property="fb:admins" content="704409894" /> <meta property="fb:app_id" content="4942312939" /> <meta property="og:site_name" content="ABC News" /> <link rel="opengraph" href="..." /> <link rel="origin" href="..."/> <meta property="op:markup_version" content="v1.0"> ''' facets = facet.find_head_facets(t) assert facets == [('fb:admins', '704409894'), ('fb:app_id', '4942312939'), ('opengraph', True), ('fb instant', True), ('opengraph', ('...', 'notype')), ('origin', ('...', 'notype'))]
def test_link_rel(): t = ''' <link rel="amphtml" href="http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092" /> <link rel="alternate" type="application/rss+xml" title="App Links » FAQs Comments Feed" href="http://applinks.org/faqs/feed/" /> <link rel="canonical" href="https://www.bloomberg.com/news/articles/2016-10-31/foo"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('link-rel-amphtml', {'href': 'http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092'}), ('link-rel-alternate', {'href': 'http://applinks.org/faqs/feed/', 'title': 'App Links \u00bb FAQs Comments Feed', 'type': 'application/rss+xml'}), ('link-rel-canonical', {'href': 'https://www.bloomberg.com/news/articles/2016-10-31/foo'})]
def test_double_entries(): t = ''' <meta name="robots" content="noarchive" /> <meta name="robots" content="index, follow" /> <meta http-equiv="content-type" content="text/html; charset=utf-8"> <meta charset="utf-8"> <meta name="referrer" content="unsafe-url"> <meta name="referrer" content="always"> <meta name="format-detection" content="telephone=no"/> <meta name="format-detection" content="email=no"/> ''' facets = facet.find_head_facets(t) assert facets == [('robots', 'noarchive'), ('robots', 'index, follow'), ('referrer', 'unsafe-url'), ('referrer', 'always'), ('format-detection', 'telephone=no'), ('format-detection', 'email=no')]
def test_link_rel(): t = ''' <link rel="amphtml" href="http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092" /> <link rel="alternate" type="application/rss+xml" title="App Links » FAQs Comments Feed" href="http://applinks.org/faqs/feed/" /> <link rel="canonical" href="https://www.bloomberg.com/news/articles/2016-10-31/postmates-secures-141-million-in-a-super-super-difficult-fundraising-effort"> ''' facets = facet.find_head_facets(t) assert facets == [ ('amphtml', ('http://abcnews.go.com/amp/Politics/russia-trump-political-conflict-zone/story?id=42263092', 'notype')), ('alternate', ('http://applinks.org/faqs/feed/', 'application/rss+xml')), ('canonical', ('https://www.bloomberg.com/news/articles/2016-10-31/postmates-secures-141-million-in-a-super-super-difficult-fundraising-effort', 'notype')) ]
def test_facebook(): t = ''' <meta property="fb:admins" content="704409894" /> <meta property="fb:app_id" content="4942312939" /> <meta property="og:site_name" content="ABC News" /> <link rel="opengraph" href="..." /> <link rel="origin" href="..."/> <meta property="op:markup_version" content="v1.0"> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-property-fb:admins', '704409894'), ('meta-property-fb:app_id', '4942312939'), ('meta-property-og:site_name', 'ABC News'), ('meta-property-op:markup_version', 'v1.0'), ('link-rel-opengraph', {'href': '...'}), ('link-rel-origin', {'href': '...'})]
def test_integrity(): t = ''' <script src="https://example.com/example-framework.js" integrity="sha384-Li9vy3DqF8tnTXuiaAJuML3ky+er10rcgNR/VqsVpcw+ThHmYcwiB1pbOxEbzJr7" crossorigin="anonymous"></script> <script src="hello_world.js" integrity="sha384-dOTZf16X8p34q2/kYyEFm0jh89uTjikhnzjeLeF0FHsEaYKb1A1cv+Lyv4Hk8vHd sha512-Q2bFTOhEALkN8hOms2FKTDLy7eugP2zFZ1T8LCvX42Fp3WoNr3bjZSAHeOsHrbV1Fu9/A0EzCinRE7Af1ofPrw==" crossorigin="anonymous"></script> <link rel="opengraph" href="http://example.com" integrity="sha384-Li9vy3DqF8tnTXuiaAJuML3ky+er10rcgNR/VqsVpcw+ThHmYcwiB1pbOxEbzJr7" /> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('link-rel-opengraph', {'href': 'http://example.com'}), ('thing-script integrity', 3)]
def test_twitter(): t = ''' <meta property="twitter:card" content="summary_large_image" /> <meta property="twitter:site" content="@ABC" /> <meta property="twitter:creator" content="@brianross" /> <meta name="twitter:app:id:iphone" content="300255638" /> <meta name="twitter:app:url:iphone" content="abcnewsiphone://link/story,42263092" /> <meta name="twitter:app:name:ipad" content="ABC News" /> <meta name="twitter:app:id:ipad" content="306934135" /> ''' facets = facet.find_head_facets(t) assert facets == [('twitter card', True), ('twitter card', True), ('twitter card', True), ('twitter card', True), ('twitter:site', '@ABC'), ('twitter:creator', '@brianross')] facets = facet.facet_dedup(facets) assert facets == [('twitter card', True), ('twitter:site', '@ABC'), ('twitter:creator', '@brianross')]
def test_double_entries(): t = ''' <meta name="robots" content="noarchive" /> <meta name="robots" content="index, follow" /> <meta http-equiv="content-type" content="text/html; charset=utf-8"> <meta charset="utf-8"> <meta name="referrer" content="unsafe-url"> <meta name="referrer" content="always"> <meta name="format-detection" content="telephone=no"/> <meta name="format-detection" content="email=no"/> ''' head_soup = BeautifulSoup(t, 'lxml') facets = facet.find_head_facets(t, head_soup) assert facets == [('meta-name-robots', 'noarchive'), ('meta-name-robots', 'index, follow'), ('meta-name-referrer', 'unsafe-url'), ('meta-name-referrer', 'always'), ('meta-name-format-detection', 'telephone=no'), ('meta-name-format-detection', 'email=no'), ('meta-http-equiv-content-type', 'text/html; charset=utf-8')]