def test_extraction(self): '''Test the extractor's behaviour among different situations''' lx = HTMLImageLinkExtractor(locations=('//img', )) links_1 = lx.extract_links(self.response) self.assertEqual(links_1, [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'), Link(url='http://example.com/sample2.jpg', text=u'sample 2'), Link(url='http://example.com/sample4.jpg', text=u'sample 4') ]) lx = HTMLImageLinkExtractor(locations=('//img', ), unique=False) links_2 = lx.extract_links(self.response) self.assertEqual(links_2, [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'), Link(url='http://example.com/sample2.jpg', text=u'sample 2'), Link(url='http://example.com/sample4.jpg', text=u'sample 4'), Link(url='http://example.com/sample4.jpg', text=u'sample 4 repetition') ]) lx = HTMLImageLinkExtractor(locations=('//div[@id="wrapper"]', )) links_3 = lx.extract_links(self.response) self.assertEqual(links_3, [ Link(url='http://example.com/sample1.jpg', text=u'sample 1'), Link(url='http://example.com/sample2.jpg', text=u'sample 2'), Link(url='http://example.com/sample4.jpg', text=u'sample 4') ]) lx = HTMLImageLinkExtractor(locations=('//a', )) links_4 = lx.extract_links(self.response) self.assertEqual(links_4, [ Link(url='http://example.com/sample2.jpg', text=u'sample 2'), Link(url='http://example.com/sample3.html', text=u'sample 3') ])
def test_extraction(self): """Test the extractor's behaviour among different situations""" lx = HTMLImageLinkExtractor(locations=("//img",)) links_1 = lx.extract_links(self.response) self.assertEqual( links_1, [ Link(url="http://example.com/sample1.jpg", text=u"sample 1"), Link(url="http://example.com/sample2.jpg", text=u"sample 2"), Link(url="http://example.com/sample4.jpg", text=u"sample 4"), ], ) lx = HTMLImageLinkExtractor(locations=("//img",), unique=False) links_2 = lx.extract_links(self.response) self.assertEqual( links_2, [ Link(url="http://example.com/sample1.jpg", text=u"sample 1"), Link(url="http://example.com/sample2.jpg", text=u"sample 2"), Link(url="http://example.com/sample4.jpg", text=u"sample 4"), Link(url="http://example.com/sample4.jpg", text=u"sample 4 repetition"), ], ) lx = HTMLImageLinkExtractor(locations=('//div[@id="wrapper"]',)) links_3 = lx.extract_links(self.response) self.assertEqual( links_3, [ Link(url="http://example.com/sample1.jpg", text=u"sample 1"), Link(url="http://example.com/sample2.jpg", text=u"sample 2"), Link(url="http://example.com/sample4.jpg", text=u"sample 4"), ], ) lx = HTMLImageLinkExtractor(locations=("//a",)) links_4 = lx.extract_links(self.response) self.assertEqual( links_4, [ Link(url="http://example.com/sample2.jpg", text=u"sample 2"), Link(url="http://example.com/sample3.html", text=u"sample 3"), ], )
def test_urls_type(self): '''Test that the resulting urls are regular strings and not a unicode objects''' lx = HTMLImageLinkExtractor() links = lx.extract_links(self.response) self.assertTrue(all(isinstance(link.url, str) for link in links))