def __parse_urls(self, body): """Parses out URLs from the body. Returns a tuple of lists of URI and Domain objects, respectively.""" unique_urls = set() unique_domains = set() if self.__verbose_output: sys.stderr.write("** parsing urls from email body\n") for match in URL_PATTERN.findall(body): url = match[0] unique_urls.add(url) domain = whois.extract_domain(url) unique_domains.add(domain) # Mapping of domain names to the objects that reference them domain_map = {} # List of Domain and related (DNS, Whois) objects to be returned domain_list = [] # List of URI objects to be returned url_list = [] for domain in unique_domains: domain_objs = self.__create_domain_objs(domain) # Save all the related objects in the correct order in the list. domain_list.extend(self.__reorder_domain_objs(domain_objs)) # Save this domain object for linking to URLs domain_map[domain] = domain_objs['URI'] for u in unique_urls: if self.include_url_objects: url = self.__create_url_object(u) # Retrieve the Domain from the dictionary domain = domain_map[whois.extract_domain(u)] # Add relationships between the URL and the domain domain.add_related(url, 'Extracted_From', inline=False) domain.add_related(url, 'FQDN_Of', inline=False) url.add_related(domain, 'Contains', inline=False) url_list.append(url) return (url_list, domain_list)
def backcookie(self, command, host, cookie, vcmd): headers = { "User-Agent": "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1", "Cookie": cookie + "=" + command.encode(self.eb) } try: r = requests.get(host, headers=headers) validate = r.headers.values() self.url = str(r.url) self.url = extract_domain(self.url) except: self.Error() if validate[0] == "0" or vcmd == "command": print color["blue"] + r.text.strip() + color["white"] else: self.Error()
def backcookie(self, command, host, cookie, vcmd): headers = { "User-Agent": "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1", "Cookie": cookie + "=" + command.encode(self.eb), } try: r = requests.get(host, headers=headers) validate = r.headers.values() self.url = str(r.url) self.url = extract_domain(self.url) except: self.Error() if validate[0] == "0" or vcmd == "command": print color["blue"] + r.text.strip() + color["white"] else: self.Error()
def get_domain_expiration_date(url): domain = whois.extract_domain(url) return whois.whois(domain).expiration_date
def test_unicode_domain_and_tld(self): url = 'http://россия.рф/' domain = 'россия.рф' self.assertEqual(domain, extract_domain(url))
def test_simple_unicode_domain(self): url = 'http://нарояци.com/' domain = 'нарояци.com' self.assertEqual(domain, extract_domain(url))
def test_ascii_with_schema_path_and_query(self): url = 'https://www.google.com/search?q=why+is+domain+whois+such+a+mess' domain = 'google.com' self.assertEqual(domain, extract_domain(url))
def test_simple_ascii_domain(self): url = 'google.com' domain = url self.assertEqual(domain, extract_domain(url))
def test_second_level_domain(self): """Verify that TLDs which only have second-level domains parse correctly""" url = 'google.co.za' domain = url self.assertEqual(domain, extract_domain(url))
def test_ipv4(self): """ Verify that ipv4 addresses work """ url = '172.217.3.110' domain = '1e100.net' # double extract_domain() so we avoid possibly changing hostnames like lga34s18-in-f14.1e100.net self.assertEqual(domain, extract_domain(extract_domain(url)))
def test_ipv6(self): """ Verify that ipv6 addresses work """ url = '2607:f8b0:4006:802::200e' domain = '1e100.net' # double extract_domain() so we avoid possibly changing hostnames like lga34s12-in-x0e.1e100.net self.assertEqual(domain, extract_domain(extract_domain(url)))
def _check_and_normalize_host(self, host): normalize_host = whois.extract_domain(host) if len(normalize_host.split('.')) == 1: raise ApiException(435, 'Inlavid host format') return host
def extract_domain(url: str) -> str: """recibe url|ipv4|ipv6 valida y devuelve el dominio""" domain = whois.extract_domain(url) return domain