def suggest_domain(search_domain): """Suggest a domain based on the search fields.""" search_terms = search_domain.split(' ') # Check for a simple common typo first - putting comma instead of period # in-between the second- and top-level domains. if len(search_terms) == 1: candidate = re.sub(r'[,/-]', '.', search_terms[0]) if Domain.try_parse(candidate) is not None: return candidate # Pick up space-separated domain levels. if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS: candidate = '.'.join(search_terms) if Domain.try_parse(candidate) is not None: return candidate # Attempt to make a domain from the terms. joiners = ('', '-') # for now, also trialling ('', '-', '.') tlds = ('com', ) # for now suggestions = [] # Filter out a ton of garbage being submitted if len(search_terms) > 2: return # Filter out long words search_terms = [term for term in search_terms if len(term) < 30] # Filter out silly characters search_terms = [ re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms ] # Join the terms for joiner in joiners: suggestions.append(joiner.join(search_terms)) # Add TLDs suggested_domains = [] for tld in tlds: suggested_domains.extend( ['{}.{}'.format(s.lower(), tld) for s in suggestions]) # Drop out duplicates suggested_domains = list(set(suggested_domains)) # Filter for those that are actually valid domains valid_suggestions = list( filter(lambda d: Domain.try_parse(d) is not None, suggested_domains)) if len(valid_suggestions) == 0: return return random.choice(valid_suggestions)
def try_parse_domain_from_hex(hex_encoded_ascii_domain): try: ascii_domain_text = bytes.fromhex(hex_encoded_ascii_domain).decode( 'ascii') except (ValueError, TypeError): return return Domain.try_parse(ascii_domain_text)
def domain_to_hex(domain_param): """Helps you convert domains to hex.""" domain = Domain.try_parse(domain_param) if domain is None: flask.abort(400, 'Malformed domain.') hexdomain = domain.to_hex() payload = standard_api_values(domain, skip='domain_to_hex') payload['domain_as_hexadecimal'] = hexdomain return flask.jsonify(payload)
def test_parse_domain(self): """Tests of the helper that decodes and validates a domain. Function returns a valid domain or None. """ self.assertIs(None, tools.try_parse_domain_from_hex(''), 'Missing hex data should return None') self.assertIs(None, tools.try_parse_domain_from_hex(None), 'Non-hex-decodable data should return None') self.assertIs(None, tools.try_parse_domain_from_hex('he378a -- ?'), 'Non-hex-decodable data should return None') bad_domain = '\\www.z.comasfff' self.assertFalse( Domain.try_parse(bad_domain) is not None, 'Bad domain should be invalid') long_bad_domain = 'www.zsssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssszssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss.com' self.assertFalse( Domain.try_parse(long_bad_domain) is not None, 'Long domain should be invalid') bad_domain_data = binascii.hexlify(bad_domain.encode()) self.assertIs( None, tools.try_parse_domain_from_hex(bad_domain_data), 'hex-decodable (but invalid) domain data should return None') domain = 'www.example.com' self.assertTrue( Domain.try_parse(domain) is not None, 'Good domain should be valid') domain_data = binascii.hexlify(domain.encode()).decode() self.assertEqual('www.example.com', tools.try_parse_domain_from_hex(domain_data), 'hex-decodable valid domain data should be returned')
def __filter_domains(self): seen = set() filtered = [] for d in self.domains: d_obj = Domain.try_parse(d['domain-name']) if d_obj is None: continue if d_obj.to_unicode() in seen: continue seen.add(d_obj.to_unicode()) filtered.append(d) self.domains = filtered
def search_post(): """Handle form submit.""" try: post_data = flask.request.form['domains'] except KeyError: app.logger.info('Missing "domains" key from POST: {}'.format( flask.request.form)) return flask.redirect('/error/2') if post_data is None or post_data.strip() == '': app.logger.info('No data in "domains" key in POST') return flask.redirect('/error/2') searched_domain = Domain.try_parse(post_data.strip()) if searched_domain is None: return handle_invalid_domain( binascii.hexlify(post_data.encode()).decode('ascii')) return flask.redirect('/search/{}'.format(searched_domain.to_hex()))
def test_encode_bonkers_unicode(): """Some unicode is not "valid".""" unicode_domain = u'a\uDFFFa.com' assert Domain.try_parse(unicode_domain) is None
def get_domain(url): """Return the domain from a URL.""" return Domain.try_parse(urllib.parse.urlparse(url).netloc.split(':')[0])
def clean_up_search_term(search_term): """Remove HTTP(s) schemes and trailing slashes.""" search_term = re.sub('(^http(s)?://)|(/$)', '', search_term, re.IGNORECASE) return Domain.try_parse(search_term)
def test_can_try_parse(): assert Domain.try_parse('?@?!#?!?23//d/sad') is None assert Domain.try_parse('xn--w5a.com') == Domain('xn--w5a.com')