Example #1
0
 def test_safe_download_url(self):
     self.assertEqual(safe_download_url('http://www.example.org/../'),
                      'http://www.example.org/')
     self.assertEqual(safe_download_url('http://www.example.org/../../images/../image'),
                      'http://www.example.org/image')
     self.assertEqual(safe_download_url('http://www.example.org/dir/'),
                      'http://www.example.org/dir/')
Example #2
0
 def test_safe_download_url(self):
     self.assertEqual(safe_download_url('http://www.example.org/../'),
                      'http://www.example.org/')
     self.assertEqual(
         safe_download_url('http://www.example.org/../../images/../image'),
         'http://www.example.org/image')
     self.assertEqual(safe_download_url('http://www.example.org/dir/'),
                      'http://www.example.org/dir/')
Example #3
0
 def parse_video_page(self, r):
     yield {
         'page_url': r.url,
         'title': r.css('article > h1::text').get(),
         'videos': [
             {'src': safe_download_url(r.urljoin(src.xpath('@src').get())),
              'type': src.xpath('@type').get()}
             for src in r.css('video > source')
         ],
         'video_thumbnail': safe_download_url(
             r.urljoin(r.css('video::attr(poster)').get())),
     }
Example #4
0
    def test_safe_download_url(self):
        self.assertEqual(safe_download_url('http://www.example.org'),
                         'http://www.example.org/')
        self.assertEqual(safe_download_url('http://www.example.org/../'),
                         'http://www.example.org/')
        self.assertEqual(
            safe_download_url('http://www.example.org/../../images/../image'),
            'http://www.example.org/image')
        self.assertEqual(safe_download_url('http://www.example.org/dir/'),
                         'http://www.example.org/dir/')
        self.assertEqual(safe_download_url(b'http://www.example.org/dir/'),
                         'http://www.example.org/dir/')

        # Encoding related tests
        self.assertEqual(
            safe_download_url(b'http://www.example.org?\xa3',
                              encoding='latin-1',
                              path_encoding='latin-1'),
            'http://www.example.org/?%A3')
        self.assertEqual(
            safe_download_url(b'http://www.example.org?\xc2\xa3',
                              encoding='utf-8',
                              path_encoding='utf-8'),
            'http://www.example.org/?%C2%A3')
        self.assertEqual(
            safe_download_url(b'http://www.example.org/\xc2\xa3?\xc2\xa3',
                              encoding='utf-8',
                              path_encoding='latin-1'),
            'http://www.example.org/%A3?%C2%A3')
Example #5
0
    def test_safe_download_url(self):
        self.assertEqual(safe_download_url("http://www.example.org"),
                         "http://www.example.org/")
        self.assertEqual(safe_download_url("http://www.example.org/../"),
                         "http://www.example.org/")
        self.assertEqual(
            safe_download_url("http://www.example.org/../../images/../image"),
            "http://www.example.org/image",
        )
        self.assertEqual(
            safe_download_url("http://www.example.org/dir/"),
            "http://www.example.org/dir/",
        )
        self.assertEqual(
            safe_download_url(b"http://www.example.org/dir/"),
            "http://www.example.org/dir/",
        )

        # Encoding related tests
        self.assertEqual(
            safe_download_url(
                b"http://www.example.org?\xa3",
                encoding="latin-1",
                path_encoding="latin-1",
            ),
            "http://www.example.org/?%A3",
        )
        self.assertEqual(
            safe_download_url(
                b"http://www.example.org?\xc2\xa3",
                encoding="utf-8",
                path_encoding="utf-8",
            ),
            "http://www.example.org/?%C2%A3",
        )
        self.assertEqual(
            safe_download_url(
                b"http://www.example.org/\xc2\xa3?\xc2\xa3",
                encoding="utf-8",
                path_encoding="latin-1",
            ),
            "http://www.example.org/%A3?%C2%A3",
        )
Example #6
0
 def url(self):
     if self._url is None:
         return
     return urltool.canonicalize_url(urltool.safe_download_url(self._url),
                                     encoding='utf-8')
Example #7
0
 def _set_url(self, url):
     self.url = urltool.canonicalize_url(urltool.safe_download_url(url),
                                         encoding=self.encoding)
Example #8
0
 def url(self, url):
     #only starts with schema:file,http,https allowed to be a valid url
     if not urltool.is_url(url):
         raise ValueError('Not a valid url for Request.')
     else:
         self.__url = urltool.safe_download_url(url)
Example #9
0
 def _tran(url):
     return urltool.canonicalize_url(urltool.safe_download_url(url),
                                     encoding='utf-8')