def test_parse_domain(self): """Tests of the helper that decodes and validates a domain. Function returns a valid domain or None. """ self.assertIs(None, tools.parse_domain(''), 'Missing hex data should return None') self.assertIs(None, tools.parse_domain(None), 'Non-hex-decodable data should return None') self.assertIs(None, tools.parse_domain('he378a -- ?'), 'Non-hex-decodable data should return None') bad_domain = '\\www.z.comasfff' self.assertFalse(dnstwist.is_valid_domain(bad_domain), 'Bad domain should be invalid') long_bad_domain = 'www.zsssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssszssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss.com' self.assertFalse(dnstwist.is_valid_domain(long_bad_domain), 'Long domain should be invalid') bad_domain_data = binascii.hexlify(bad_domain) self.assertIs( None, tools.parse_domain(bad_domain_data), 'hex-decodable (but invalid) domain data should return None') domain = 'www.example.com' self.assertTrue(dnstwist.is_valid_domain(domain), 'Good domain should be valid') domain_data = binascii.hexlify(domain) self.assertEqual('www.example.com', tools.parse_domain(domain_data), 'hex-decodable valid domain data should be returned')
def suggest_domain(search_domain): """Suggest a domain based on the search fields.""" search_terms = search_domain.split(' ') # Check for a simple common typo first - putting comma instead of period # in-between the second- and top-level domains. if len(search_terms) == 1: candidate = re.sub(r'[,/-]', '.', search_terms[0]) if dnstwist.is_valid_domain(candidate): return candidate # Pick up space-separated domain levels. if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS: candidate = '.'.join(search_terms) if dnstwist.is_valid_domain(candidate): return candidate # Attempt to make a domain from the terms. joiners = ('', '-') # for now, also trialling ('', '-', '.') tlds = ('com', ) # for now suggestions = [] # Filter out a ton of garbage being submitted if len(search_terms) > 2: return # Filter out long words search_terms = [term for term in search_terms if len(term) < 30] # Filter out silly characters search_terms = [ re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms ] # Join the terms for joiner in joiners: suggestions.append(joiner.join(search_terms)) # Add TLDs suggested_domains = [] for tld in tlds: suggested_domains.extend( ['{}.{}'.format(s.lower(), tld) for s in suggestions]) # Drop out duplicates suggested_domains = list(set(suggested_domains)) # Filter for those that are actually valid domains valid_suggestions = filter(dnstwist.is_valid_domain, suggested_domains) if len(valid_suggestions) == 0: return return random.choice(valid_suggestions)
def _base64_redirect(encoded_domain): """Try to parse a domain into base64, return a redirect to the hex version if successful, otherwise None. """ try: decoded_domain = base64.b64decode(encoded_domain) if dnstwist.is_valid_domain(decoded_domain): return '/atom/{}'.format(tools.encode_domain(decoded_domain)) except: pass
def note(self, domain): """Record that the domains have appeared in a delta report. We increment each time we note, and move the expiry forward to the chosen number of seconds. That gives us a sliding window of changes over the period. """ if dnstwist.is_valid_domain(domain): pipe = self.r_conn.pipeline() pipe.incr(domain) pipe.expire(domain, EXPIRY) pipe.execute()
def parse_domain(encoded_domain): """Given a hex-encoded string, try to decode and validate it and if it is a valid domain, return it. Return None on un-decodable or invalid domain. """ try: decoded_domain = decode_domain(encoded_domain) if dnstwist.is_valid_domain(decoded_domain): return decoded_domain.lower() except: pass
def get_delta_domains(): """Return a list of all the domains in all the delta reports. If this stops scaling I'll switch to an iterator off a DB query. """ url = os.getenv('DELTAS_URL') if url is None: raise Exception('Delta report URL configuration not set!') json = requests.get(url, timeout=10).json() return [ domain for (domain, ) in json['values'] if dnstwist.is_valid_domain(domain) ]
def resolve(domain): """Resolves a domain to an IP. Returns and (IP, False) on successful resolution, (False, False) on successful failure to resolve and (None, True) on error in attempting to resolve. Cached to 1 hour. """ if not dnstwist.is_valid_domain(domain): return False, True idna_domain = domain.encode('idna') # Try for an 'A' record. try: ip_addr = str(sorted(RESOLVER.query(idna_domain, 'A'))[0].address) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except: pass # Try for a simple resolution if the 'A' record request failed try: ip_addr = socket.gethostbyname(idna_domain) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except socket.gaierror: # Indicates failure to resolve to IP address, not an error in # the attempt. return False, False except: pass # Error due to exception of 127.0.0.1 issue. return False, True