def setUp(self): self.p = wp.WebPage() self.url = 'http://test-site.com' self.path = tempfile.mkdtemp() self.source = six.StringIO(html) self.file_name = 'index.html' self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name)
def setUp(self): self.w = wp.WebPage() self.url = 'http://webpage2.com' self.path = str(tempfile.mkdtemp()) self.source = six.StringIO(html_2) self.file_name = 'index.html' self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name) self.w.set_source(self.source) self.w.utx = self.utx self.w.parse()
def setUp(self): self.w = cr.Crawler() self.url = 'http://webpage2.com' self.path = tempfile.mkdtemp() self.source = six.StringIO(html) self.file_name = 'index.html' self.utx = urls.URLTransformer(self.url, self.url, self.path, self.file_name) # self.reset() self.w.set_source(self.source) self.w.utx = self.utx
def test_url_parsing(self): obj = urls.URLTransformer('http://some-site.com:80/path/#frag?query') obj.default_filename = 'index.html' obj._unique_fn_required = False self.assertEqual(obj.original_url, 'http://some-site.com:80/path/#frag?query') self.assertEqual(obj.url, 'http://some-site.com:80/path/#frag?query') self.assertEqual(obj.parsed_url, urlparse.urlsplit('http://some-site.com:80/path/')) self.assertEqual(obj.parsed_url.port, 80) self.assertEqual(obj.hostname, 'some-site.com') self.assertEqual(obj.url_path, '/path/') self.assertEqual(obj.file_name, 'index.html') self.assertEqual(obj.to_path, url2pathname('some-site.com/path/')) self.assertEqual(obj.file_path, url2pathname('some-site.com/path/index.html'))
def test_url_parsing_after_set_base(self): obj = urls.URLTransformer('../some/rel/path/') obj.base_url = "http://some-site.com:80" obj.base_path = "e:\\tests\\" obj.default_filename = 'index.html' obj._unique_fn_required = False self.assertEqual(obj.original_url, '../some/rel/path/') self.assertEqual(obj.url, 'http://some-site.com:80/some/rel/path/') self.assertEqual( obj.parsed_url, urlparse.urlsplit('http://some-site.com:80/some/rel/path/', allow_fragments=False)) self.assertEqual(obj.parsed_url.port, 80) self.assertEqual(obj.hostname, 'some-site.com') self.assertEqual(obj.url_path, '/some/rel/path/') self.assertEqual(obj.file_name, 'index.html') self.assertEqual(obj.to_path, 'e:\\tests\\some-site.com\\some\\rel\\path\\') self.assertEqual( obj.file_path, url2pathname( 'e://tests/some-site.com/some/rel/path/index.html').lower())
def test_parsing_source_checks(self): # Objective is parsing without any errors self.p.set_source(six.StringIO()) self.p.utx = urls.URLTransformer(self.url, self.url, self.path) self.p.parse()