def callback(message, context): """Callback handler for certstream events.""" if message['message_type'] == "heartbeat": return if message['message_type'] == "certificate_update": all_domains = message['data']['leaf_cert']['all_domains'] # Loop through all of the domains found in the cert for domain in all_domains: #Is the domain whitelisted if is_whitelisted(domain, whitelisted_domains): continue # Loop through each of the domains that we're watching for watch_domain in watchlist.keys(): lets_encrypt = False keywords = watchlist[watch_domain] score = score_domain(domain.lower(), watch_domain, keywords) # More suspicious if it's issued by a free CA if "Let's Encrypt" in message['data']['chain'][0]['subject'][ 'aggregated']: score += 20 handle_score_and_log(clean_domain(domain), clean_domain(watch_domain), score)
def main(): parser = argparse.ArgumentParser() params = get_params(parser) domain = clean_domain(params.domain) new_link = LinksQueueItem(link=domain) session.add(new_link) session.commit()
def get_sitemap_url(params): if params.domain: domain = clean_domain(params.domain) robots = get_robots_by_domain(domain) if not robots: return None sitemap_url = get_sitemap_url_by_robots(robots) if not sitemap_url: return None else: sitemap_url = params.url return sitemap_url
def callback(message, context): """Callback handler for certstream events.""" if message['message_type'] == "heartbeat": return if message['message_type'] == "certificate_update": all_domains = message['data']['leaf_cert']['all_domains'] # Loop through all of the domains found in the cert for domain in all_domains: #Is the domain whitelisted if is_whitelisted(domain, whitelisted_domains): continue if external_analysis: x = requests.get(external_analysis_url + "?domain={}".format(clean_domain(domain))) print(json.loads(x.text)) if json.loads(x.text)['score'] > 0.5: handle_score_and_log( clean_domain(domain), clean_domain(json.loads(x.text)['target']), json.loads(x.text)['similarity']) continue # Loop through each of the domains that we're watching for watch_domain in watchlist.keys(): lets_encrypt = False keywords = watchlist[watch_domain] score = score_domain(domain.lower(), watch_domain, keywords) # More suspicious if it's issued by a free CA if "Let's Encrypt" in message['data']['chain'][0]['subject'][ 'aggregated']: score += 20 handle_score_and_log(clean_domain(domain), clean_domain(watch_domain), score)
def score_domain(target_domain, watch_domain, keywords): ''' Score the likelihood of the target domain being a phishing clone of the watch domain. :param target_domain: the domain of the newly registered certificate :param watch_domain: the domain being monitored :param watch_domain: whether the CA of the certificate is Let's Encrypt :return: the score of the domain in question. ''' score = 0 target_domain = clean_domain(target_domain) watch_domain = clean_domain(watch_domain) # If the target domain is the watch domain, don't score it if target_domain == watch_domain: return 0 try: target_domain.encode('ascii') except: # Contains unicode, suspicious score += 20 target_domain = unidecode(target_domain) # If the parsed target domain is the parsed watch domain, but with a different TLD, very suspicious if remove_tld(watch_domain) == remove_tld(target_domain): return 100 # If the watch domain is in the target domain, but they aren't equal, suspicious if watch_domain in target_domain: return 100 # If they have a low levenshtein distance, suspicious l_distance = Levenshtein.distance(remove_tld(watch_domain), remove_tld(target_domain)) fuzz_ratio = fuzz.token_sort_ratio(remove_tld(watch_domain), remove_tld(target_domain)) # Works for both short and long strings if l_distance <= 2: score = 50 + 25 * (2 - l_distance) # Better with longer strings elif fuzz_ratio > 80: score = fuzz_ratio - 25 # TODO: keyword functionality is temporarily disabled # score += fuzzy_scorer_keywords(keywords, remove_tld(target_domain)) # print(fuzzy_scorer_keywords(keywords, remove_tld(target_domain))) target_len = len(remove_tld(target_domain)) watch_len = len(remove_tld(watch_domain)) # If the target domain is much shorter than the watch domain, it's probably not much of a threat if target_len > watch_len / 2 and target_len > 4: # Detect the presence of the watch domain in the target domain score += fuzzy_scorer_domain(remove_tld(watch_domain), remove_tld(target_domain)) # Detect suspicious domain structure # Remove initial '*.' for wildcard certificates if target_domain.startswith('*.'): target_domain = target_domain[2:] # Detect fake TLD (e.g. *.com-account-management.info) if any(fake_tld in remove_tld(target_domain) for fake_tld in ['com', 'net', 'org', 'io']): score += 20 # Detect unreliable TLDs if any(target_domain.endswith(bad_tld) for bad_tld in bad_repuation_tlds): score += 20 # If the target domain isn't split by .'s then check if it's too long to realistically look like the watch domain try: max_segment_target_domain = len( max(remove_tld(target_domain).replace("-", ".").split("."), key=len)) except Exception as e: max_segment_target_domain = len(remove_tld(target_domain)) if max_segment_target_domain > len(remove_tld(watch_domain)) * 1.5: score /= 2 return score