Exemple #1
0
    def test_parse_domain(self):
        """Tests of the helper that decodes and validates a domain.

        Function returns a valid domain or None.
        """
        self.assertIs(None, tools.parse_domain(''),
                      'Missing hex data should return None')

        self.assertIs(None, tools.parse_domain(None),
                      'Non-hex-decodable data should return None')
        self.assertIs(None, tools.parse_domain('he378a -- ?'),
                      'Non-hex-decodable data should return None')

        bad_domain = '\\www.z.comasfff'
        self.assertFalse(dnstwist.validate_domain(bad_domain),
                         'Bad domain should be invalid')

        bad_domain_data = binascii.hexlify(bad_domain)
        self.assertIs(
            None, tools.parse_domain(bad_domain_data),
            'hex-decodable (but invalid) domain data should return None')

        domain = 'www.example.com'
        self.assertTrue(dnstwist.validate_domain(domain),
                        'Good domain should be valid')

        domain_data = binascii.hexlify(domain)
        self.assertEqual('www.example.com', tools.parse_domain(domain_data),
                         'hex-decodable valid domain data should be returned')
Exemple #2
0
def suggest_domain(search_terms):
    """Suggest a domain based on the search fields."""

    # Check for a simple common typo first - putting comma instead of period
    # in-between the second- and top-level domains.
    if len(search_terms) == 1:
        candidate = re.sub(r'[,/-]', '.', search_terms[0])
        if dnstwist.validate_domain(candidate):
            return candidate

    # Pick up space-separated domain levels.
    if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS:
        candidate = '.'.join(search_terms)
        if dnstwist.validate_domain(candidate):
            return candidate

    # Attempt to make a domain from the terms.
    joiners = ('', '-') # for now, also trialling ('', '-', '.')
    tlds = ('com',)  # for now
    suggestions = []

    # Filter out a ton of garbage being submitted
    if len(search_terms) > 2:
        return

    # Filter out long words
    search_terms = [term
                    for term
                    in search_terms
                    if len(term) < 30]

    # Filter out silly characters
    search_terms = [re.sub(r'[^a-zA-Z0-9\-]', '', term)
                    for term
                    in search_terms]

    # Join the terms
    for joiner in joiners:
        suggestions.append(joiner.join(search_terms))

    # Add TLDs
    suggested_domains = []
    for tld in tlds:
        suggested_domains.extend(['{}.{}'.format(s.lower(), tld)
                                  for s
                                  in suggestions])

    # Drop out duplicates
    suggested_domains = list(set(suggested_domains))

    # Filter for those that are actually valid domains
    valid_suggestions = filter(
        dnstwist.validate_domain, suggested_domains
    )

    if len(valid_suggestions) == 0:
        return

    return random.choice(valid_suggestions)
Exemple #3
0
def suggest_domain(search_domain):
    """Suggest a domain based on the search fields."""

    search_terms = search_domain.split(' ')

    # Check for a simple common typo first - putting comma instead of period
    # in-between the second- and top-level domains.
    if len(search_terms) == 1:
        candidate = re.sub(r'[,/-]', '.', search_terms[0])
        if dnstwist.validate_domain(candidate):
            return candidate

    # Pick up space-separated domain levels.
    if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS:
        candidate = '.'.join(search_terms)
        if dnstwist.validate_domain(candidate):
            return candidate

    # Attempt to make a domain from the terms.
    joiners = ('', '-')  # for now, also trialling ('', '-', '.')
    tlds = ('com', )  # for now
    suggestions = []

    # Filter out a ton of garbage being submitted
    if len(search_terms) > 2:
        return

    # Filter out long words
    search_terms = [term for term in search_terms if len(term) < 30]

    # Filter out silly characters
    search_terms = [
        re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms
    ]

    # Join the terms
    for joiner in joiners:
        suggestions.append(joiner.join(search_terms))

    # Add TLDs
    suggested_domains = []
    for tld in tlds:
        suggested_domains.extend(
            ['{}.{}'.format(s.lower(), tld) for s in suggestions])

    # Drop out duplicates
    suggested_domains = list(set(suggested_domains))

    # Filter for those that are actually valid domains
    valid_suggestions = filter(dnstwist.validate_domain, suggested_domains)

    if len(valid_suggestions) == 0:
        return

    return random.choice(valid_suggestions)
    def test_parse_domain(self):
        """Tests of the helper that decodes and validates a domain.

        Function returns a valid domain or None.
        """
        self.assertIs(
            None, tools.parse_domain(''),
            'Missing hex data should return None'
        )

        self.assertIs(
            None, tools.parse_domain(None),
            'Non-hex-decodable data should return None'
        )
        self.assertIs(
            None, tools.parse_domain('he378a -- ?'),
            'Non-hex-decodable data should return None'
        )

        bad_domain = '\\www.z.comasfff'
        self.assertFalse(
            dnstwist.validate_domain(bad_domain),
            'Bad domain should be invalid'
        )

        bad_domain_data = binascii.hexlify(bad_domain)
        self.assertIs(
            None, tools.parse_domain(bad_domain_data),
            'hex-decodable (but invalid) domain data should return None'
        )

        domain = 'www.example.com'
        self.assertTrue(
            dnstwist.validate_domain(domain),
            'Good domain should be valid'
        )

        domain_data = binascii.hexlify(domain)
        self.assertEqual(
            'www.example.com',
            tools.parse_domain(domain_data),
            'hex-decodable valid domain data should be returned'
        )

        domain_data = base64.b64encode(domain)
        self.assertEqual(
            'www.example.com',
            tools.parse_domain(domain_data),
            'Old b64-style domain data is also processable.'
        )
Exemple #5
0
def resolve(domain):
    """Resolves a domain to an IP.

    Returns and (IP, False) on successful resolution, (False, False) on
    successful failure to resolve and (None, True) on error in attempting to
    resolve.

    Cached to 1 hour.
    """
    if dnstwist.validate_domain(domain) is None:
        return False, True

    # Try for an 'A' record.
    try:
        ip_addr = str(sorted(RESOLVER.query(domain, 'A'))[0].address)

        # Weird edge case that sometimes happens?!?!
        if ip_addr != '127.0.0.1':
            return ip_addr, False
    except:
        pass

    # Try for a simple resolution if the 'A' record request failed
    try:
        ip_addr = socket.gethostbyname(domain)

        # Weird edge case that sometimes happens?!?!
        if ip_addr != '127.0.0.1':
            return ip_addr, False
    except:
        pass

    return google_resolve(domain)
Exemple #6
0
def _base64_redirect(encoded_domain):
    """Try to parse a domain into base64, return a redirect to the hex version
    if successful, otherwise None.
    """
    try:
        decoded_domain = base64.b64decode(encoded_domain)
        if dnstwist.validate_domain(decoded_domain):
            return '/atom/{}'.format(binascii.hexlify(decoded_domain))
    except:
        pass
Exemple #7
0
    def note(self, domain):
        """Record that the domains have appeared in a delta report.

        We increment each time we note, and move the expiry forward to the
        chosen number of seconds. That gives us a sliding window of changes
        over the period.
        """
        if dnstwist.validate_domain(domain):
            pipe = self.r_conn.pipeline()
            pipe.incr(domain)
            pipe.expire(domain, EXPIRY)
            pipe.execute()
Exemple #8
0
def parse_domain(encoded_domain):
    """Given a hex-encoded string, try to decode and validate it and if it is
    a valid domain, return it.

    Return None on un-decodable or invalid domain.
    """
    try:
        decoded_domain = binascii.unhexlify(encoded_domain)
        if dnstwist.validate_domain(decoded_domain):
            return decoded_domain.lower()
    except:
        pass
Exemple #9
0
def get_delta_domains():
    """Return a list of all the domains in all the delta reports.

    If this stops scaling I'll switch to an iterator off a DB query.
    """
    url = os.getenv('DELTAS_URL')
    if url is None:
        raise Exception('Delta report URL configuration not set!')

    json = requests.get(url, timeout=10).json()
    return [
        domain for (domain, ) in json['values']
        if dnstwist.validate_domain(domain)
    ]
Exemple #10
0
def parse_domain(hexdomain):
    """Given a plain, b64- or hex-encoded string, try to return a domain.

    Return None on invalid domain.
    """
    try:
        if dnstwist.validate_domain(hexdomain):
            return hexdomain
    except:
        pass

    try:
        domain = binascii.unhexlify(hexdomain)
    except TypeError:
        try:
            # Old style URLs
            domain = base64.b64decode(hexdomain)
        except TypeError:
            return

    if not dnstwist.validate_domain(domain):
        return

    return domain.lower()
Exemple #11
0
def parse_domain(encoded_domain):
    """Given a plain, b64- or hex-encoded string, try to decode and validate
    it and if it is valid, return it.

    Return None on un-decodable or invalid domain.
    """
    decoders = (
        str,  # Plain text (breaks on a lot of firewalls).
        binascii.unhexlify,  # The current hex-encoding scheme.
        base64.b64decode,  # The predecessor to the hex version.
    )

    for decoder in decoders:
        try:
            decoded = decoder(encoded_domain)
            if dnstwist.validate_domain(decoded):
                return decoded.lower()
        except:
            pass
Exemple #12
0
def resolve(domain):
    """Resolves a domain to an IP.

    Returns and (IP, False) on successful resolution, (False, False) on
    successful failure to resolve and (None, True) on error in attempting to
    resolve.

    Cached to 1 hour.
    """
    if dnstwist.validate_domain(domain) is None:
        return False, True

    idna_domain = domain.encode('idna')

    # Try for an 'A' record.
    try:
        ip_addr = str(sorted(RESOLVER.query(idna_domain, 'A'))[0].address)

        # Weird edge case that sometimes happens?!?!
        if ip_addr != '127.0.0.1':
            return ip_addr, False
    except:
        pass

    # Try for a simple resolution if the 'A' record request failed
    try:
        ip_addr = socket.gethostbyname(idna_domain)

        # Weird edge case that sometimes happens?!?!
        if ip_addr != '127.0.0.1':
            return ip_addr, False
    except socket.gaierror:
        # Indicates failure to resolve to IP address, not an error in
        # the attempt.
        return False, False
    except:
        pass

    # Error due to exception of 127.0.0.1 issue.
    return False, True
Exemple #13
0
def process_domain(domain):
    """Process a domain - generating resolution reports and deltas."""
    if dnstwist.validate_domain(domain) is None:
        print 'Invalid: {}'.format(repr(domain))
        repository.unregister_domain(domain)
        return

    # Unregister long-time unread domains
    last_read = repository.delta_report_last_read(domain)
    if last_read is None:
        repository.mark_delta_report_as_read(domain)
    else:
        age = datetime.datetime.now() - last_read
        if age > datetime.timedelta(seconds=PERIOD * UNREGISTER):
            print 'Expired: {}'.format(domain.encode('idna'))
            repository.unregister_domain(domain)
            return

    # Skip domains that have been recently updated
    delta_last_updated = repository.delta_report_updated(domain)
    if delta_last_updated is not None:
        age = datetime.datetime.now() - delta_last_updated
        if age < datetime.timedelta(seconds=PERIOD):
            print 'Skipping: {}'.format(domain.encode('idna'))
            return

    start = time.time()

    existing_report = repository.get_resolution_report(domain)

    if existing_report is None:
        existing_report = {}

    new_report = {}
    for entry in tools.analyse(domain)[1]['fuzzy_domains'][1:]:
        ip_addr, error = tools.resolve(entry['domain-name'])
        if error or not ip_addr or ip_addr is None:
            continue
        new_report[entry['domain-name']] = {
            'ip': ip_addr,
            'tweak': entry['fuzzer'],
        }

    repository.update_resolution_report(domain, new_report)

    delta_report = {'new': [], 'updated': [], 'deleted': []}
    for (dom, data) in new_report.items():

        try:
            new_ip = data['ip']
        except TypeError:
            # handle old-style ip-only reports
            new_ip = data

        if dom in existing_report.keys():

            try:
                existing_ip = existing_report[dom]['ip']
            except TypeError:
                # handle old-style ip-only reports
                existing_ip = existing_report[dom]

            if new_ip != existing_ip:
                delta_report['updated'].append((dom, existing_ip, new_ip))
        else:

            delta_report['new'].append((dom, new_ip))

    for dom in existing_report.keys():
        if dom not in new_report.keys():
            delta_report['deleted'].append(dom)

    repository.update_delta_report(domain, delta_report)

    print 'Updated {} in {} seconds'.format(domain.encode('idna'),
                                            time.time() - start)
def process_domain(domain):
    """Process a domain - generating resolution reports and deltas."""
    if dnstwist.validate_domain(domain) is None:
        print 'Unregistering (invalid) {}'.format(domain)
        repository.unregister_domain(domain)
        return

    # Unregister long-time unread domains
    last_read = repository.delta_report_last_read(domain)
    if last_read is None:
        repository.mark_delta_report_as_read(domain)
    else:
        age = datetime.datetime.now() - last_read
        if age > datetime.timedelta(seconds=PERIOD*UNREGISTER):
            print 'Unregistering (not read > 7 days) {}'.format(domain)
            repository.unregister_domain(domain)
            return

    # Skip domains that have been recently updated
    delta_last_updated = repository.delta_report_updated(domain)
    if delta_last_updated is not None:
        age = datetime.datetime.now() - delta_last_updated
        if age < datetime.timedelta(seconds=PERIOD):
            print 'Skipping (recently updated) {}'.format(domain)
            return

    start = time.time()

    existing_report = repository.get_resolution_report(domain)

    if existing_report is None:
        existing_report = {}

    new_report = {}
    for entry in tools.analyse(domain)[1]['fuzzy_domains'][1:]:
        ip, error = tools.resolve(entry['domain-name'])
        if error or not ip or ip is None:
            continue
        new_report[entry['domain-name']] = {
            'ip': ip,
            'tweak': entry['fuzzer'],
        }

    repository.update_resolution_report(domain, new_report)

    delta_report = {'new': [], 'updated': [], 'deleted': []}
    for (dom, data) in new_report.items():

        try:
            new_ip = data['ip']
        except TypeError:
            # handle old-style ip-only reports
            new_ip = data

        if dom in existing_report.keys():

            try:
                existing_ip = existing_report[dom]['ip']
            except TypeError:
                # handle old-style ip-only reports
                existing_ip = existing_report[dom]

            if new_ip != existing_ip:
                delta_report['updated'].append(
                    (dom, existing_ip, new_ip)
                )
        else:

            delta_report['new'].append((dom, new_ip))

    for dom in existing_report.keys():
        if dom not in new_report.keys():
            delta_report['deleted'].append(dom)

    repository.update_delta_report(domain, delta_report)

    print 'Updated deltas for {} in {} seconds'.format(
        domain, time.time() - start
    )