Exemple #1
0
def callback(message, context):
    """Callback handler for certstream events."""

    if message['message_type'] == "heartbeat":
        return

    if message['message_type'] == "certificate_update":
        all_domains = message['data']['leaf_cert']['all_domains']
        # Loop through all of the domains found in the cert
        for domain in all_domains:

            #Is the domain whitelisted
            if is_whitelisted(domain, whitelisted_domains):
                continue

            # Loop through each of the domains that we're watching
            for watch_domain in watchlist.keys():

                lets_encrypt = False
                keywords = watchlist[watch_domain]
                score = score_domain(domain.lower(), watch_domain, keywords)

                # More suspicious if it's issued by a free CA
                if "Let's Encrypt" in message['data']['chain'][0]['subject'][
                        'aggregated']:
                    score += 20
                handle_score_and_log(clean_domain(domain),
                                     clean_domain(watch_domain), score)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser()
    params = get_params(parser)

    domain = clean_domain(params.domain)

    new_link = LinksQueueItem(link=domain)
    session.add(new_link)
    session.commit()
Exemple #3
0
def get_sitemap_url(params):
    if params.domain:
        domain = clean_domain(params.domain)
        robots = get_robots_by_domain(domain)
        if not robots:
            return None
        sitemap_url = get_sitemap_url_by_robots(robots)
        if not sitemap_url:
            return None
    else:
        sitemap_url = params.url
    return sitemap_url
Exemple #4
0
def callback(message, context):
    """Callback handler for certstream events."""

    if message['message_type'] == "heartbeat":
        return

    if message['message_type'] == "certificate_update":
        all_domains = message['data']['leaf_cert']['all_domains']
        # Loop through all of the domains found in the cert
        for domain in all_domains:

            #Is the domain whitelisted
            if is_whitelisted(domain, whitelisted_domains):
                continue

            if external_analysis:
                x = requests.get(external_analysis_url +
                                 "?domain={}".format(clean_domain(domain)))
                print(json.loads(x.text))
                if json.loads(x.text)['score'] > 0.5:
                    handle_score_and_log(
                        clean_domain(domain),
                        clean_domain(json.loads(x.text)['target']),
                        json.loads(x.text)['similarity'])
                continue

            # Loop through each of the domains that we're watching
            for watch_domain in watchlist.keys():

                lets_encrypt = False
                keywords = watchlist[watch_domain]
                score = score_domain(domain.lower(), watch_domain, keywords)

                # More suspicious if it's issued by a free CA
                if "Let's Encrypt" in message['data']['chain'][0]['subject'][
                        'aggregated']:
                    score += 20
                handle_score_and_log(clean_domain(domain),
                                     clean_domain(watch_domain), score)
Exemple #5
0
def score_domain(target_domain, watch_domain, keywords):
    '''
    Score the likelihood of the target domain being a phishing clone of the watch domain.

    :param target_domain: the domain of the newly registered certificate
    :param watch_domain: the domain being monitored
    :param watch_domain: whether the CA of the certificate is Let's Encrypt
    :return: the score of the domain in question.
    '''

    score = 0

    target_domain = clean_domain(target_domain)
    watch_domain = clean_domain(watch_domain)

    # If the target domain is the watch domain, don't score it
    if target_domain == watch_domain:
        return 0

    try:
        target_domain.encode('ascii')
    except:
        # Contains unicode, suspicious
        score += 20

    target_domain = unidecode(target_domain)

    # If the parsed target domain is the parsed watch domain, but with a different TLD, very suspicious
    if remove_tld(watch_domain) == remove_tld(target_domain):
        return 100

    # If the watch domain is in the target domain, but they aren't equal, suspicious
    if watch_domain in target_domain:
        return 100

    # If they have a low levenshtein distance, suspicious
    l_distance = Levenshtein.distance(remove_tld(watch_domain),
                                      remove_tld(target_domain))
    fuzz_ratio = fuzz.token_sort_ratio(remove_tld(watch_domain),
                                       remove_tld(target_domain))

    # Works for both short and long strings
    if l_distance <= 2:
        score = 50 + 25 * (2 - l_distance)
    # Better with longer strings
    elif fuzz_ratio > 80:
        score = fuzz_ratio - 25

    # TODO: keyword functionality is temporarily disabled
    # score += fuzzy_scorer_keywords(keywords, remove_tld(target_domain))
    # print(fuzzy_scorer_keywords(keywords, remove_tld(target_domain)))

    target_len = len(remove_tld(target_domain))
    watch_len = len(remove_tld(watch_domain))

    # If the target domain is much shorter than the watch domain, it's probably not much of a threat
    if target_len > watch_len / 2 and target_len > 4:
        # Detect the presence of the watch domain in the target domain
        score += fuzzy_scorer_domain(remove_tld(watch_domain),
                                     remove_tld(target_domain))

    # Detect suspicious domain structure
    # Remove initial '*.' for wildcard certificates
    if target_domain.startswith('*.'):
        target_domain = target_domain[2:]

        # Detect fake TLD (e.g. *.com-account-management.info)
        if any(fake_tld in remove_tld(target_domain)
               for fake_tld in ['com', 'net', 'org', 'io']):
            score += 20

    # Detect unreliable TLDs
    if any(target_domain.endswith(bad_tld) for bad_tld in bad_repuation_tlds):
        score += 20

    # If the target domain isn't split by .'s then check if it's too long to realistically look like the watch domain
    try:
        max_segment_target_domain = len(
            max(remove_tld(target_domain).replace("-", ".").split("."),
                key=len))
    except Exception as e:
        max_segment_target_domain = len(remove_tld(target_domain))

    if max_segment_target_domain > len(remove_tld(watch_domain)) * 1.5:
        score /= 2

    return score