Beispiel #1
0
 def setUp(self):
     self.p = wp.WebPage()
     self.url = 'http://test-site.com'
     self.path = tempfile.mkdtemp()
     self.source = six.StringIO(html)
     self.file_name = 'index.html'
     self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name)
Beispiel #2
0
 def setUp(self):
     self.w = wp.WebPage()
     self.url = 'http://webpage2.com'
     self.path = str(tempfile.mkdtemp())
     self.source = six.StringIO(html_2)
     self.file_name = 'index.html'
     self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name)
     self.w.set_source(self.source)
     self.w.utx = self.utx
     self.w.parse()
Beispiel #3
0
    def setUp(self):
        self.w = cr.Crawler()
        self.url = 'http://webpage2.com'
        self.path = tempfile.mkdtemp()
        self.source = six.StringIO(html)
        self.file_name = 'index.html'
        self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name)

        # self.reset()
        self.w.set_source(self.source)
        self.w.utx = self.utx
Beispiel #4
0
 def test_url_parsing(self):
     obj = urls.URLTransformer('http://some-site.com:80/path/#frag?query')
     obj.default_filename = 'index.html'
     obj._unique_fn_required = False
     self.assertEqual(obj.original_url,
                      'http://some-site.com:80/path/#frag?query')
     self.assertEqual(obj.url, 'http://some-site.com:80/path/#frag?query')
     self.assertEqual(obj.parsed_url,
                      urlparse.urlsplit('http://some-site.com:80/path/'))
     self.assertEqual(obj.parsed_url.port, 80)
     self.assertEqual(obj.hostname, 'some-site.com')
     self.assertEqual(obj.url_path, '/path/')
     self.assertEqual(obj.file_name, 'index.html')
     self.assertEqual(obj.to_path, url2pathname('some-site.com/path/'))
     self.assertEqual(obj.file_path,
                      url2pathname('some-site.com/path/index.html'))
Beispiel #5
0
 def test_url_parsing_after_set_base(self):
     obj = urls.URLTransformer('../some/rel/path/')
     obj.base_url = "http://some-site.com:80"
     obj.base_path = "e:\\tests\\"
     obj.default_filename = 'index.html'
     obj._unique_fn_required = False
     self.assertEqual(obj.original_url, '../some/rel/path/')
     self.assertEqual(obj.url, 'http://some-site.com:80/some/rel/path/')
     self.assertEqual(
         obj.parsed_url,
         urlparse.urlsplit('http://some-site.com:80/some/rel/path/',
                           allow_fragments=False))
     self.assertEqual(obj.parsed_url.port, 80)
     self.assertEqual(obj.hostname, 'some-site.com')
     self.assertEqual(obj.url_path, '/some/rel/path/')
     self.assertEqual(obj.file_name, 'index.html')
     self.assertEqual(obj.to_path,
                      'e:\\tests\\some-site.com\\some\\rel\\path\\')
     self.assertEqual(
         obj.file_path,
         url2pathname(
             'e://tests/some-site.com/some/rel/path/index.html').lower())
Beispiel #6
0
 def test_parsing_source_checks(self):
     # Objective is parsing without any errors
     self.p.set_source(six.StringIO())
     self.p.utx = urls.URLTransformer(self.url, self.url, self.path)
     self.p.parse()