def a8(): set1 = init_from_alexa(3001, 8000) set2 = set([]) init_domain_set("tmp.txt", set2) print set2 & set1 set3 = set([]) init_domain_set("domain_whitelist.txt", set3) print set3 & set1 print set3 & set2
def a(): hosts = ["hosts_badzeus.txt", "hosts_spyeye.txt", "hosts_palevo.txt", "hosts_feodo.txt", "hosts_cybercrime.txt", "hosts_malwaredomains.txt", "hosts_malwaredomainlist.txt", "hosts_hphosts.txt"] whole = set([]) for h in hosts: print len(whole) tmp = set([]) init_domain_set(h, tmp) whole = whole | tmp print len(tmp), len(whole), h print len(whole) tmp = set([]) init_from_phishtank(tmp) whole = whole | tmp print len(tmp), len(whole), "hosts_phishtank.csv"
def a13(): import os import re from datetime import datetime import tldextract import ConfigParser Config = ConfigParser.ConfigParser() Config.read("config.ini") root = Config.get("One", "dns_log_path") p = re.compile("^\d{4}\-\d{2}\-\d{2}$") file_list = [] tmp_set = set([]) init_domain_set("../resources/hosts_badzeus.txt", tmp_set) start_datetime = datetime(2014, 2, 1) end_datetime = datetime(2014, 3, 1) for root, dirs, files in os.walk(root): files.sort() for f in files: if not p.match(f): continue this_date = datetime.strptime(f, "%Y-%m-%d").date() if start_datetime.date() <= this_date < end_datetime.date(): file_list.append(f) for file_name in file_list: print file_name for line in open(root + file_name): line_array = line.split("||") if len(line_array) != 9 or line_array[5] != 'A': continue domain = (line_array[4][:len(line_array[4]) - 1]).lower() ext = tldextract.extract(domain) if ext.domain == "": domain = ext.suffix else: domain = ".".join(ext[1:]) if domain in tmp_set: print file_name, line
def a9(): set1 = set([]) init_domain_set("fp.txt", set1) for d in set1: print client["p140414"]["nor_domain"].find_one({"_id": d}) print client["p140414"]["nor_domain_matrix"].find_one({"_id": d})