def test_safe_download_url(self): self.assertEqual(safe_download_url('http://www.example.org/../'), 'http://www.example.org/') self.assertEqual(safe_download_url('http://www.example.org/../../images/../image'), 'http://www.example.org/image') self.assertEqual(safe_download_url('http://www.example.org/dir/'), 'http://www.example.org/dir/')
def test_safe_download_url(self): self.assertEqual(safe_download_url('http://www.example.org/../'), 'http://www.example.org/') self.assertEqual( safe_download_url('http://www.example.org/../../images/../image'), 'http://www.example.org/image') self.assertEqual(safe_download_url('http://www.example.org/dir/'), 'http://www.example.org/dir/')
def parse_video_page(self, r): yield { 'page_url': r.url, 'title': r.css('article > h1::text').get(), 'videos': [ {'src': safe_download_url(r.urljoin(src.xpath('@src').get())), 'type': src.xpath('@type').get()} for src in r.css('video > source') ], 'video_thumbnail': safe_download_url( r.urljoin(r.css('video::attr(poster)').get())), }
def test_safe_download_url(self): self.assertEqual(safe_download_url('http://www.example.org'), 'http://www.example.org/') self.assertEqual(safe_download_url('http://www.example.org/../'), 'http://www.example.org/') self.assertEqual( safe_download_url('http://www.example.org/../../images/../image'), 'http://www.example.org/image') self.assertEqual(safe_download_url('http://www.example.org/dir/'), 'http://www.example.org/dir/') self.assertEqual(safe_download_url(b'http://www.example.org/dir/'), 'http://www.example.org/dir/') # Encoding related tests self.assertEqual( safe_download_url(b'http://www.example.org?\xa3', encoding='latin-1', path_encoding='latin-1'), 'http://www.example.org/?%A3') self.assertEqual( safe_download_url(b'http://www.example.org?\xc2\xa3', encoding='utf-8', path_encoding='utf-8'), 'http://www.example.org/?%C2%A3') self.assertEqual( safe_download_url(b'http://www.example.org/\xc2\xa3?\xc2\xa3', encoding='utf-8', path_encoding='latin-1'), 'http://www.example.org/%A3?%C2%A3')
def test_safe_download_url(self): self.assertEqual(safe_download_url("http://www.example.org"), "http://www.example.org/") self.assertEqual(safe_download_url("http://www.example.org/../"), "http://www.example.org/") self.assertEqual( safe_download_url("http://www.example.org/../../images/../image"), "http://www.example.org/image", ) self.assertEqual( safe_download_url("http://www.example.org/dir/"), "http://www.example.org/dir/", ) self.assertEqual( safe_download_url(b"http://www.example.org/dir/"), "http://www.example.org/dir/", ) # Encoding related tests self.assertEqual( safe_download_url( b"http://www.example.org?\xa3", encoding="latin-1", path_encoding="latin-1", ), "http://www.example.org/?%A3", ) self.assertEqual( safe_download_url( b"http://www.example.org?\xc2\xa3", encoding="utf-8", path_encoding="utf-8", ), "http://www.example.org/?%C2%A3", ) self.assertEqual( safe_download_url( b"http://www.example.org/\xc2\xa3?\xc2\xa3", encoding="utf-8", path_encoding="latin-1", ), "http://www.example.org/%A3?%C2%A3", )
def url(self): if self._url is None: return return urltool.canonicalize_url(urltool.safe_download_url(self._url), encoding='utf-8')
def _set_url(self, url): self.url = urltool.canonicalize_url(urltool.safe_download_url(url), encoding=self.encoding)
def url(self, url): #only starts with schema:file,http,https allowed to be a valid url if not urltool.is_url(url): raise ValueError('Not a valid url for Request.') else: self.__url = urltool.safe_download_url(url)
def _tran(url): return urltool.canonicalize_url(urltool.safe_download_url(url), encoding='utf-8')