예제 #1
0
 def test_extract_links(self):
     response = '<a>http://tomato.org/test</a><b>http://tomato.org/test</b><a>http://tomato.com/test</a>'
     links = extract_links(response)
     self.assertEquals(links, ['http://tomato.org/test', 'http://tomato.com/test'])
예제 #2
0
 def test_extract_links_with_blacklist_extensions(self):
     response = '<a>http://tomato.org/test</a><b>http://tomato.org/test</b><a>http://tomato.com/test</a>'
     links = extract_links(response, blacklist_extensions=['.org'])
     self.assertEquals(links, ['http://tomato.com/test'])
예제 #3
0
 def test_extract_links_with_regex(self):
     response = '<a>http://tomato.org/test</a><b>http://tomato.org/test</b><a>http://tomato.com/test</a>'
     links = extract_links(response, regex='tomato\.org')
     self.assertEquals(links, ['http://tomato.org/test'])
예제 #4
0
 def test_extract_links_with_whitelist_domains(self):
     response = '<a>http://tomato.org/test</a><b>http://tomato.org/test</b><a>http://tomato.com/test</a>'
     links = extract_links(response, whitelist_domains=['tomato.org'])
     self.assertEquals(links, ['http://tomato.org/test'])
예제 #5
0
 def test_extract_links_common_white_and_black_lists(self):
     response = '<a>http://tomato.org/test</a><b>http://tomato.org/test</b><a>http://tomato.com/test</a>'
     with self.assertRaises(LinkExtractorException):
         extract_links(response, whitelist_domains=['test'], blacklist_domains=['test'])