def test_parse_domain(self): """Tests of the helper that decodes and validates a domain. Function returns a valid domain or None. """ self.assertIs(None, tools.parse_domain(''), 'Missing hex data should return None') self.assertIs(None, tools.parse_domain(None), 'Non-hex-decodable data should return None') self.assertIs(None, tools.parse_domain('he378a -- ?'), 'Non-hex-decodable data should return None') bad_domain = '\\www.z.comasfff' self.assertFalse(dnstwist.validate_domain(bad_domain), 'Bad domain should be invalid') bad_domain_data = binascii.hexlify(bad_domain) self.assertIs( None, tools.parse_domain(bad_domain_data), 'hex-decodable (but invalid) domain data should return None') domain = 'www.example.com' self.assertTrue(dnstwist.validate_domain(domain), 'Good domain should be valid') domain_data = binascii.hexlify(domain) self.assertEqual('www.example.com', tools.parse_domain(domain_data), 'hex-decodable valid domain data should be returned')
def suggest_domain(search_terms): """Suggest a domain based on the search fields.""" # Check for a simple common typo first - putting comma instead of period # in-between the second- and top-level domains. if len(search_terms) == 1: candidate = re.sub(r'[,/-]', '.', search_terms[0]) if dnstwist.validate_domain(candidate): return candidate # Pick up space-separated domain levels. if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS: candidate = '.'.join(search_terms) if dnstwist.validate_domain(candidate): return candidate # Attempt to make a domain from the terms. joiners = ('', '-') # for now, also trialling ('', '-', '.') tlds = ('com',) # for now suggestions = [] # Filter out a ton of garbage being submitted if len(search_terms) > 2: return # Filter out long words search_terms = [term for term in search_terms if len(term) < 30] # Filter out silly characters search_terms = [re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms] # Join the terms for joiner in joiners: suggestions.append(joiner.join(search_terms)) # Add TLDs suggested_domains = [] for tld in tlds: suggested_domains.extend(['{}.{}'.format(s.lower(), tld) for s in suggestions]) # Drop out duplicates suggested_domains = list(set(suggested_domains)) # Filter for those that are actually valid domains valid_suggestions = filter( dnstwist.validate_domain, suggested_domains ) if len(valid_suggestions) == 0: return return random.choice(valid_suggestions)
def suggest_domain(search_domain): """Suggest a domain based on the search fields.""" search_terms = search_domain.split(' ') # Check for a simple common typo first - putting comma instead of period # in-between the second- and top-level domains. if len(search_terms) == 1: candidate = re.sub(r'[,/-]', '.', search_terms[0]) if dnstwist.validate_domain(candidate): return candidate # Pick up space-separated domain levels. if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS: candidate = '.'.join(search_terms) if dnstwist.validate_domain(candidate): return candidate # Attempt to make a domain from the terms. joiners = ('', '-') # for now, also trialling ('', '-', '.') tlds = ('com', ) # for now suggestions = [] # Filter out a ton of garbage being submitted if len(search_terms) > 2: return # Filter out long words search_terms = [term for term in search_terms if len(term) < 30] # Filter out silly characters search_terms = [ re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms ] # Join the terms for joiner in joiners: suggestions.append(joiner.join(search_terms)) # Add TLDs suggested_domains = [] for tld in tlds: suggested_domains.extend( ['{}.{}'.format(s.lower(), tld) for s in suggestions]) # Drop out duplicates suggested_domains = list(set(suggested_domains)) # Filter for those that are actually valid domains valid_suggestions = filter(dnstwist.validate_domain, suggested_domains) if len(valid_suggestions) == 0: return return random.choice(valid_suggestions)
def test_parse_domain(self): """Tests of the helper that decodes and validates a domain. Function returns a valid domain or None. """ self.assertIs( None, tools.parse_domain(''), 'Missing hex data should return None' ) self.assertIs( None, tools.parse_domain(None), 'Non-hex-decodable data should return None' ) self.assertIs( None, tools.parse_domain('he378a -- ?'), 'Non-hex-decodable data should return None' ) bad_domain = '\\www.z.comasfff' self.assertFalse( dnstwist.validate_domain(bad_domain), 'Bad domain should be invalid' ) bad_domain_data = binascii.hexlify(bad_domain) self.assertIs( None, tools.parse_domain(bad_domain_data), 'hex-decodable (but invalid) domain data should return None' ) domain = 'www.example.com' self.assertTrue( dnstwist.validate_domain(domain), 'Good domain should be valid' ) domain_data = binascii.hexlify(domain) self.assertEqual( 'www.example.com', tools.parse_domain(domain_data), 'hex-decodable valid domain data should be returned' ) domain_data = base64.b64encode(domain) self.assertEqual( 'www.example.com', tools.parse_domain(domain_data), 'Old b64-style domain data is also processable.' )
def resolve(domain): """Resolves a domain to an IP. Returns and (IP, False) on successful resolution, (False, False) on successful failure to resolve and (None, True) on error in attempting to resolve. Cached to 1 hour. """ if dnstwist.validate_domain(domain) is None: return False, True # Try for an 'A' record. try: ip_addr = str(sorted(RESOLVER.query(domain, 'A'))[0].address) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except: pass # Try for a simple resolution if the 'A' record request failed try: ip_addr = socket.gethostbyname(domain) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except: pass return google_resolve(domain)
def _base64_redirect(encoded_domain): """Try to parse a domain into base64, return a redirect to the hex version if successful, otherwise None. """ try: decoded_domain = base64.b64decode(encoded_domain) if dnstwist.validate_domain(decoded_domain): return '/atom/{}'.format(binascii.hexlify(decoded_domain)) except: pass
def note(self, domain): """Record that the domains have appeared in a delta report. We increment each time we note, and move the expiry forward to the chosen number of seconds. That gives us a sliding window of changes over the period. """ if dnstwist.validate_domain(domain): pipe = self.r_conn.pipeline() pipe.incr(domain) pipe.expire(domain, EXPIRY) pipe.execute()
def parse_domain(encoded_domain): """Given a hex-encoded string, try to decode and validate it and if it is a valid domain, return it. Return None on un-decodable or invalid domain. """ try: decoded_domain = binascii.unhexlify(encoded_domain) if dnstwist.validate_domain(decoded_domain): return decoded_domain.lower() except: pass
def get_delta_domains(): """Return a list of all the domains in all the delta reports. If this stops scaling I'll switch to an iterator off a DB query. """ url = os.getenv('DELTAS_URL') if url is None: raise Exception('Delta report URL configuration not set!') json = requests.get(url, timeout=10).json() return [ domain for (domain, ) in json['values'] if dnstwist.validate_domain(domain) ]
def parse_domain(hexdomain): """Given a plain, b64- or hex-encoded string, try to return a domain. Return None on invalid domain. """ try: if dnstwist.validate_domain(hexdomain): return hexdomain except: pass try: domain = binascii.unhexlify(hexdomain) except TypeError: try: # Old style URLs domain = base64.b64decode(hexdomain) except TypeError: return if not dnstwist.validate_domain(domain): return return domain.lower()
def parse_domain(encoded_domain): """Given a plain, b64- or hex-encoded string, try to decode and validate it and if it is valid, return it. Return None on un-decodable or invalid domain. """ decoders = ( str, # Plain text (breaks on a lot of firewalls). binascii.unhexlify, # The current hex-encoding scheme. base64.b64decode, # The predecessor to the hex version. ) for decoder in decoders: try: decoded = decoder(encoded_domain) if dnstwist.validate_domain(decoded): return decoded.lower() except: pass
def resolve(domain): """Resolves a domain to an IP. Returns and (IP, False) on successful resolution, (False, False) on successful failure to resolve and (None, True) on error in attempting to resolve. Cached to 1 hour. """ if dnstwist.validate_domain(domain) is None: return False, True idna_domain = domain.encode('idna') # Try for an 'A' record. try: ip_addr = str(sorted(RESOLVER.query(idna_domain, 'A'))[0].address) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except: pass # Try for a simple resolution if the 'A' record request failed try: ip_addr = socket.gethostbyname(idna_domain) # Weird edge case that sometimes happens?!?! if ip_addr != '127.0.0.1': return ip_addr, False except socket.gaierror: # Indicates failure to resolve to IP address, not an error in # the attempt. return False, False except: pass # Error due to exception of 127.0.0.1 issue. return False, True
def process_domain(domain): """Process a domain - generating resolution reports and deltas.""" if dnstwist.validate_domain(domain) is None: print 'Invalid: {}'.format(repr(domain)) repository.unregister_domain(domain) return # Unregister long-time unread domains last_read = repository.delta_report_last_read(domain) if last_read is None: repository.mark_delta_report_as_read(domain) else: age = datetime.datetime.now() - last_read if age > datetime.timedelta(seconds=PERIOD * UNREGISTER): print 'Expired: {}'.format(domain.encode('idna')) repository.unregister_domain(domain) return # Skip domains that have been recently updated delta_last_updated = repository.delta_report_updated(domain) if delta_last_updated is not None: age = datetime.datetime.now() - delta_last_updated if age < datetime.timedelta(seconds=PERIOD): print 'Skipping: {}'.format(domain.encode('idna')) return start = time.time() existing_report = repository.get_resolution_report(domain) if existing_report is None: existing_report = {} new_report = {} for entry in tools.analyse(domain)[1]['fuzzy_domains'][1:]: ip_addr, error = tools.resolve(entry['domain-name']) if error or not ip_addr or ip_addr is None: continue new_report[entry['domain-name']] = { 'ip': ip_addr, 'tweak': entry['fuzzer'], } repository.update_resolution_report(domain, new_report) delta_report = {'new': [], 'updated': [], 'deleted': []} for (dom, data) in new_report.items(): try: new_ip = data['ip'] except TypeError: # handle old-style ip-only reports new_ip = data if dom in existing_report.keys(): try: existing_ip = existing_report[dom]['ip'] except TypeError: # handle old-style ip-only reports existing_ip = existing_report[dom] if new_ip != existing_ip: delta_report['updated'].append((dom, existing_ip, new_ip)) else: delta_report['new'].append((dom, new_ip)) for dom in existing_report.keys(): if dom not in new_report.keys(): delta_report['deleted'].append(dom) repository.update_delta_report(domain, delta_report) print 'Updated {} in {} seconds'.format(domain.encode('idna'), time.time() - start)
def process_domain(domain): """Process a domain - generating resolution reports and deltas.""" if dnstwist.validate_domain(domain) is None: print 'Unregistering (invalid) {}'.format(domain) repository.unregister_domain(domain) return # Unregister long-time unread domains last_read = repository.delta_report_last_read(domain) if last_read is None: repository.mark_delta_report_as_read(domain) else: age = datetime.datetime.now() - last_read if age > datetime.timedelta(seconds=PERIOD*UNREGISTER): print 'Unregistering (not read > 7 days) {}'.format(domain) repository.unregister_domain(domain) return # Skip domains that have been recently updated delta_last_updated = repository.delta_report_updated(domain) if delta_last_updated is not None: age = datetime.datetime.now() - delta_last_updated if age < datetime.timedelta(seconds=PERIOD): print 'Skipping (recently updated) {}'.format(domain) return start = time.time() existing_report = repository.get_resolution_report(domain) if existing_report is None: existing_report = {} new_report = {} for entry in tools.analyse(domain)[1]['fuzzy_domains'][1:]: ip, error = tools.resolve(entry['domain-name']) if error or not ip or ip is None: continue new_report[entry['domain-name']] = { 'ip': ip, 'tweak': entry['fuzzer'], } repository.update_resolution_report(domain, new_report) delta_report = {'new': [], 'updated': [], 'deleted': []} for (dom, data) in new_report.items(): try: new_ip = data['ip'] except TypeError: # handle old-style ip-only reports new_ip = data if dom in existing_report.keys(): try: existing_ip = existing_report[dom]['ip'] except TypeError: # handle old-style ip-only reports existing_ip = existing_report[dom] if new_ip != existing_ip: delta_report['updated'].append( (dom, existing_ip, new_ip) ) else: delta_report['new'].append((dom, new_ip)) for dom in existing_report.keys(): if dom not in new_report.keys(): delta_report['deleted'].append(dom) repository.update_delta_report(domain, delta_report) print 'Updated deltas for {} in {} seconds'.format( domain, time.time() - start )