Example #1
0
 def test_uri_stripped_of_whitespace_and_quote_characters_correctly(self):
     urls = [u' image.jpg ', u"    '/data.jpg'", u'\n\t"file.jpg"\n\t\t']
     results = ['http://www.example.com/images/image.jpg',
                'http://www.example.com/data.jpg',
                'http://www.example.com/images/file.jpg']
     htmlpage = HtmlPage(url=u"http://www.example.com/images/",
                         body=u'<html><body></body></html>',
                         encoding='utf-8')
     url_p = UrlFieldTypeProcessor()
     img_p = ImagesFieldTypeProcessor()
     for text, url in zip(urls, results):
         self.assertEqual(img_p.adapt(img_p.extract(text), htmlpage), url)
         self.assertEqual(url_p.adapt(url_p.extract(text), htmlpage), url)
Example #2
0
 def test_uri_with_illegal_html_entities(self):
     urls = [u'&#9;&#10 image.jpg ', u"    '/&#11;&#0;data.jpg'",
             u'&#15;\n\t"&#14;file.jpg"\n\t\t']
     results = ['http://www.example.com/images/image.jpg',
                'http://www.example.com/data.jpg',
                'http://www.example.com/images/file.jpg']
     htmlpage = HtmlPage(url=u"http://www.example.com/images/",
                         body=u'<html><body></body></html>',
                         encoding='utf-8')
     url_p = UrlFieldTypeProcessor()
     img_p = ImagesFieldTypeProcessor()
     for text, url in zip(urls, results):
         self.assertEqual(img_p.adapt(img_p.extract(text), htmlpage), url)
         self.assertEqual(url_p.adapt(url_p.extract(text), htmlpage), url)
Example #3
0
 def test_blank_image_url(self):
     assert ImagesFieldTypeProcessor().extract(' ') == ''