def test_finds_query(self): url = 'http://mysubdomain.example.com?myquery=test' result = url_parser.parse_url(url) self.assertEqual(result['query']['myquery'], 'test') url = 'http://mysubdomain.example.com?myquery=test&one=two&test' result = url_parser.parse_url(url) self.assertEqual(result['query']['myquery'], 'test') self.assertEqual(result['query']['one'], 'two') self.assertIsNone(result['query']['test']) url = 'http://mysubdomain.example.com/file.js?myquery=test&one=two' result = url_parser.parse_url(url) self.assertEqual(result['query']['myquery'], 'test') self.assertEqual(result['query']['one'], 'two') url = 'http://mysubdomain.example.com/path/and/file.js?myquery=test&one=two' result = url_parser.parse_url(url) self.assertEqual(result['query']['myquery'], 'test') self.assertEqual(result['query']['one'], 'two') url = 'http://mysubdomain.example.com/path/?myquery=test&one=two' result = url_parser.parse_url(url) self.assertEqual(result['query']['myquery'], 'test') self.assertEqual(result['query']['one'], 'two')
def test_finds_top_domain(self): url = 'http://mysubdomain.example.com' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'com') url = 'http://mysubdomain.example.co.uk' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'co.uk')
def test_finds_file(self): url = 'http://mysubdomain.example.com/cool.jpg' result = url_parser.parse_url(url) self.assertEqual(result['file'], 'cool.jpg') url = 'http://mysubdomain.example.com/directory/here/sample.mp4' result = url_parser.parse_url(url) self.assertEqual(result['file'], 'sample.mp4')
def test_finds_dir(self): url = 'http://mysubdomain.example.com/folder/' result = url_parser.parse_url(url) self.assertEqual(result['dir'], '/folder/') url = 'http://mysubdomain.example.com/multiple/folders/' result = url_parser.parse_url(url) self.assertEqual(result['dir'], '/multiple/folders/') url = 'http://mysubdomain.example.com/multiple/folders/with_a_file.js' result = url_parser.parse_url(url) self.assertEqual(result['dir'], '/multiple/folders/')
def test_finds_path(self): url = 'http://mysubdomain.example.com/path' result = url_parser.parse_url(url) self.assertEqual(result['path'], '/path') url = 'http://mysubdomain.example.com/this/is/the/path' result = url_parser.parse_url(url) self.assertEqual(result['path'], '/this/is/the/path') url = 'http://mysubdomain.example.com/path/with/file.js' result = url_parser.parse_url(url) self.assertEqual(result['path'], '/path/with/file.js')
def test_finds_protocol(self): url = 'http://mysubdomain.example.com' result = url_parser.parse_url(url) self.assertEqual(result['protocol'], 'http') url = 'https://mysubdomain.example.com' result = url_parser.parse_url(url) self.assertEqual(result['protocol'], 'https') url = 'ftp://mysubdomain.example.com' result = url_parser.parse_url(url) self.assertEqual(result['protocol'], 'ftp')
def test_domain_that_includes_a_top_domain_in_query(self): url = 'http://test.com.hello.nogo.no?my_query_domain=www.test.com' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'no') self.assertEqual(result['domain'], 'nogo') self.assertEqual(result['sub_domain'], 'test.com.hello') self.assertEqual(result['query']['my_query_domain'], 'www.test.com')
def test_domain_that_starts_with_same_letters_as_top_domain(self): url = 'http://domains-stars-with-same-top-domain.nogo.no/' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'no') self.assertEqual(result['domain'], 'nogo') self.assertEqual(result['sub_domain'], 'domains-stars-with-same-top-domain')
def test_catastrophic_backtracking(self): url = 'http://very_long-and-complixated_subdomaind-for-page.mywebpageishere.com/' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'com') self.assertEqual(result['domain'], 'mywebpageishere') self.assertEqual(result['sub_domain'], 'very_long-and-complixated_subdomaind-for-page')
def getVideoId(self, url): url_data = url_parser.parse_url(url) print('url_data:', url_data) try: return url_data['query']['v'] except KeyError: raise excepCust.Invalid_Url()
def getkey(): # Extract Key end = "https://ss.apple.com" key = ses.get(end).url pars = url_parser.parse_url(key) joa = str(pars['path']) extr = re.findall("appIdKey=(.*?)&", joa)[0] ses.cookies.clear() return extr
def test_finds_fragment(self): url = 'http://mysubdomain.example.com#my_fragment' result = url_parser.parse_url(url) self.assertEqual(result['fragment'], 'my_fragment') url = 'http://mysubdomain.example.com/path/#my_fragment' result = url_parser.parse_url(url) self.assertEqual(result['fragment'], 'my_fragment') url = 'http://mysubdomain.example.com/path/file.js#my_fragment' result = url_parser.parse_url(url) self.assertEqual(result['fragment'], 'my_fragment') url = 'http://mysubdomain.example.com#my_fragment?myargs=test' result = url_parser.parse_url(url) self.assertEqual(result['fragment'], 'my_fragment') url = 'http://mysubdomain.example.com/test/path.js#my_fragment?myargs=test' result = url_parser.parse_url(url) self.assertEqual(result['fragment'], 'my_fragment')
def test_removes_extra_dot_from_www(self): url = 'http://www..example.com' result = url_parser.parse_url(url) has_dot = '.' in result['www'] self.assertFalse(has_dot)
from url_parser import parse_url, get_url, get_base_url url = parse_url( "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0" ) # returns url sections as a dict url_object = get_url( "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0" ) # Does the same, bur returns a object basic_url = get_base_url( "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0" ) # Returns just the main url print(url["domain"]) # Outputs -> prospecta print(url_object.domain) # Outputs -> prospecta print(basic_url) # Outputs -> https://open.prospecta.app
def test_does_not_mistake_file_for_dir(self): url = 'http://mysubdomain.example.com/folder/test' result = url_parser.parse_url(url) self.assertEqual(result['dir'], '/folder/') self.assertNotEqual(result['dir'], '/folder/test')
def test_returns_null_if_protocol_is_missing(self): url = 'www.example.com' result = url_parser.parse_url(url) self.assertIsNone(result['protocol'])
def test_domain_that_includes_a_top_domain_in_sub_domain(self): url = 'http://test.com.hello.nogo.no/' result = url_parser.parse_url(url) self.assertEqual(result['top_domain'], 'no') self.assertEqual(result['domain'], 'nogo') self.assertEqual(result['sub_domain'], 'test.com.hello')
def test_parses_url_without_www(self): url = 'example.com' result = url_parser.parse_url(url) self.assertEqual(result['domain'], 'example') self.assertEqual(result['top_domain'], 'com')
# implement pip as a subprocess: #subprocess.check_call([sys.executable, '-m', 'pip', 'install', #'url_parser']) #subprocess.check_call([sys.executable, '-m', 'pip', 'install', #'selenium']) from url_parser import parse_url from selenium import webdriver from selenium.webdriver.common.keys import Keys url = 'https://nomdeplume4reb.github.io/portfolio/' #use url_parser library to parse parts of give url parsed = parse_url('https://nomdeplume4reb.github.io/portfolio/') #covert none types to empty string convert_None = lambda i: i or '' #create variables for parts of url tld = parsed['top_domain'] domain = parsed['domain'] + '.' + parsed['top_domain'] #because 'hostname' is not included in url_parser library, I have to sub = '' if parsed['sub_domain'] == None: sub = '' else: sub = '.' hostname = convert_None(
def test_finds_multiple_subdomains(self): url = 'my.subdomain.example.com' result = url_parser.parse_url(url) self.assertEqual(result['sub_domain'], 'my.subdomain')
def test_returns_null_if_sub_domain_is_missing(self): url = 'http://example.com' result = url_parser.parse_url(url) self.assertIsNone(result['sub_domain'])