Esempio n. 1
0
 def test_safe_download_url(self):
     self.assertEqual(safe_download_url('http://www.scrapy.org/../'),
                      'http://www.scrapy.org/')
     self.assertEqual(safe_download_url('http://www.scrapy.org/../../images/../image'),
                      'http://www.scrapy.org/image')
     self.assertEqual(safe_download_url('http://www.scrapy.org/dir/'),
                      'http://www.scrapy.org/dir/')
Esempio n. 2
0
 def test_safe_download_url(self):
     self.assertEqual(safe_download_url('http://www.scrapy.org/../'),
                      'http://www.scrapy.org/')
     self.assertEqual(safe_download_url('http://www.scrapy.org/../../images/../image'),
                      'http://www.scrapy.org/image')
     self.assertEqual(safe_download_url('http://www.scrapy.org/dir/'),
                      'http://www.scrapy.org/dir/')
Esempio n. 3
0
 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     if text is None:
         return
     encoding = getattr(htmlpage, 'encoding', 'utf-8')
     text = text.encode(encoding)
     unquoted = unquote_markup(text, encoding=encoding)
     cleaned = strip_url(disallowed.sub('', unquoted))
     base = get_base_url(htmlpage).encode(encoding)
     base_url = strip_url(unquote_markup(base, encoding=encoding))
     joined = urljoin(base_url, cleaned)
     return safe_download_url(joined)
Esempio n. 4
0
 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin_rfc(get_base_url(htmlpage), text)
     return safe_download_url(unquote_markup(joined))
Esempio n. 5
0
 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin(
         get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(
         unquote_markup(joined, encoding=htmlpage.encoding))
Esempio n. 6
0
 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     text = text.encode(htmlpage.encoding)
     joined = urljoin(get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(unquote_markup(joined, encoding=htmlpage.encoding))