Beispiel #1
0
 def test_should_not_follow_mailtos(self):
     site_text = '<a href="mailto:[email protected]">[email protected]</a>'
     images = extract_domains(site_text)
     self.assertEqual(len(images), 0)
Beispiel #2
0
 def test_should_handle_webm_link(self):
     site_text = '<a href="//upload.wikimedia.org/wikipedia/commons/4/4e/Plasma_globe_23s.webm" title="Play media" target="new"><span class="play-btn-large"><span class="mw-tmh-playtext">Play media</span></span></a>'
     images = extract_domains(site_text)
     self.assertEqual(len(images), 0)
Beispiel #3
0
 def test_should_extract_hrefs_from_a_tags(self):
     site_text = '<a href="http://example.com">text</a><a href="http://example2.com">text</a>'
     domains = extract_domains(site_text)
     self.assertEqual(len(domains), 2)
Beispiel #4
0
 def test_should_ignore_duplicate_links_that_are_page_anchors(self):
     site_text = '<a href="http://example2.com/index.html">text</a><a href="http://example2.com/index.html#anchor">text</a>'
     domains = extract_domains(site_text)
     self.assertEqual(len(domains), 1)
Beispiel #5
0
 def test_should_extract_relative_hrefs_from_a_tag(self):
     site_text = '<a href="http://example.com">text</a>'
     domains = extract_domains(site_text)
     self.assertEqual(len(domains), 1)
     self.assertEqual(domains[0], "http://example.com")