Beispiel #1
0
def suggest_domain(search_domain):
    """Suggest a domain based on the search fields."""

    search_terms = search_domain.split(' ')

    # Check for a simple common typo first - putting comma instead of period
    # in-between the second- and top-level domains.
    if len(search_terms) == 1:
        candidate = re.sub(r'[,/-]', '.', search_terms[0])
        if Domain.try_parse(candidate) is not None:
            return candidate

    # Pick up space-separated domain levels.
    if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS:
        candidate = '.'.join(search_terms)
        if Domain.try_parse(candidate) is not None:
            return candidate

    # Attempt to make a domain from the terms.
    joiners = ('', '-')  # for now, also trialling ('', '-', '.')
    tlds = ('com', )  # for now
    suggestions = []

    # Filter out a ton of garbage being submitted
    if len(search_terms) > 2:
        return

    # Filter out long words
    search_terms = [term for term in search_terms if len(term) < 30]

    # Filter out silly characters
    search_terms = [
        re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms
    ]

    # Join the terms
    for joiner in joiners:
        suggestions.append(joiner.join(search_terms))

    # Add TLDs
    suggested_domains = []
    for tld in tlds:
        suggested_domains.extend(
            ['{}.{}'.format(s.lower(), tld) for s in suggestions])

    # Drop out duplicates
    suggested_domains = list(set(suggested_domains))

    # Filter for those that are actually valid domains
    valid_suggestions = list(
        filter(lambda d: Domain.try_parse(d) is not None, suggested_domains))

    if len(valid_suggestions) == 0:
        return

    return random.choice(valid_suggestions)
Beispiel #2
0
def try_parse_domain_from_hex(hex_encoded_ascii_domain):
    try:
        ascii_domain_text = bytes.fromhex(hex_encoded_ascii_domain).decode(
            'ascii')
    except (ValueError, TypeError):
        return

    return Domain.try_parse(ascii_domain_text)
Beispiel #3
0
def domain_to_hex(domain_param):
    """Helps you convert domains to hex."""
    domain = Domain.try_parse(domain_param)
    if domain is None:
        flask.abort(400, 'Malformed domain.')

    hexdomain = domain.to_hex()
    payload = standard_api_values(domain, skip='domain_to_hex')
    payload['domain_as_hexadecimal'] = hexdomain
    return flask.jsonify(payload)
Beispiel #4
0
    def test_parse_domain(self):
        """Tests of the helper that decodes and validates a domain.

        Function returns a valid domain or None.
        """
        self.assertIs(None, tools.try_parse_domain_from_hex(''),
                      'Missing hex data should return None')

        self.assertIs(None, tools.try_parse_domain_from_hex(None),
                      'Non-hex-decodable data should return None')
        self.assertIs(None, tools.try_parse_domain_from_hex('he378a -- ?'),
                      'Non-hex-decodable data should return None')

        bad_domain = '\\www.z.comasfff'
        self.assertFalse(
            Domain.try_parse(bad_domain) is not None,
            'Bad domain should be invalid')

        long_bad_domain = 'www.zsssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssszssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss.com'
        self.assertFalse(
            Domain.try_parse(long_bad_domain) is not None,
            'Long domain should be invalid')

        bad_domain_data = binascii.hexlify(bad_domain.encode())
        self.assertIs(
            None, tools.try_parse_domain_from_hex(bad_domain_data),
            'hex-decodable (but invalid) domain data should return None')

        domain = 'www.example.com'
        self.assertTrue(
            Domain.try_parse(domain) is not None,
            'Good domain should be valid')

        domain_data = binascii.hexlify(domain.encode()).decode()
        self.assertEqual('www.example.com',
                         tools.try_parse_domain_from_hex(domain_data),
                         'hex-decodable valid domain data should be returned')
Beispiel #5
0
    def __filter_domains(self):
        seen = set()
        filtered = []

        for d in self.domains:

            d_obj = Domain.try_parse(d['domain-name'])
            if d_obj is None:
                continue

            if d_obj.to_unicode() in seen:
                continue
            seen.add(d_obj.to_unicode())

            filtered.append(d)

        self.domains = filtered
Beispiel #6
0
def search_post():
    """Handle form submit."""
    try:
        post_data = flask.request.form['domains']
    except KeyError:
        app.logger.info('Missing "domains" key from POST: {}'.format(
            flask.request.form))
        return flask.redirect('/error/2')

    if post_data is None or post_data.strip() == '':
        app.logger.info('No data in "domains" key in POST')
        return flask.redirect('/error/2')

    searched_domain = Domain.try_parse(post_data.strip())

    if searched_domain is None:
        return handle_invalid_domain(
            binascii.hexlify(post_data.encode()).decode('ascii'))

    return flask.redirect('/search/{}'.format(searched_domain.to_hex()))
Beispiel #7
0
def test_encode_bonkers_unicode():
    """Some unicode is not "valid"."""
    unicode_domain = u'a\uDFFFa.com'
    assert Domain.try_parse(unicode_domain) is None
Beispiel #8
0
def get_domain(url):
    """Return the domain from a URL."""
    return Domain.try_parse(urllib.parse.urlparse(url).netloc.split(':')[0])
Beispiel #9
0
def clean_up_search_term(search_term):
    """Remove HTTP(s) schemes and trailing slashes."""
    search_term = re.sub('(^http(s)?://)|(/$)', '', search_term, re.IGNORECASE)

    return Domain.try_parse(search_term)
def test_can_try_parse():
    assert Domain.try_parse('?@?!#?!?23//d/sad') is None
    assert Domain.try_parse('xn--w5a.com') == Domain('xn--w5a.com')