def match(cls, domain): pr = publicsuffixlist.PublicSuffixList() prd = pr.privatesuffix(domain) if prd == None: prd = "" flag = False for p in cls.RegexList: if re.match(p, domain) != None or re.match(p, prd) != None: flag = True break return flag
import hues from plugin_system import Plugin import publicsuffixlist import pickle import os psl = publicsuffixlist.PublicSuffixList() plugin = Plugin('Послать сообщение', usage='''напиши [id] [сообщение] - написать сообщение пользователю скрыть [id] - не получать сообщения от пользователя показать [id] - получать сообщения от пользователя небеспокоить - не получать сообщения вообще беспокоить - получать сообщения от пользователей, не в вашем чёрном списке'''.split("\n")) black_list = {} muted = {} if not os.path.exists("plugins/msg_sender_data"): os.makedirs("plugins/msg_sender_data") try: with open('plugins/msg_sender_data/bl.pkl', 'rb') as f: black_list = pickle.load(f) except: pass try: with open('plugins/msg_sender_data/m.pkl', 'rb') as f: muted = pickle.load(f)
if expected_data == "none": return True elif expected_data == "partial" and actual_data in ["partial", "full"]: return True elif expected_data == actual_data == "full": return True return False def get_tracker_domain(tracker): url = urlparse(tracker) return get_tracker_domain.psl.privatesuffix(url.hostname) # Takes significant time to instantiate (~100ms), so only do it once get_tracker_domain.psl = publicsuffixlist.PublicSuffixList() class TorrentProblems: INVALID_PATH_SEGMENT = [b"", b".", b"..", b"/", b"\\"] BAD_CHARACTER_SET = [ b"\x00", b"<", b">", b":", b"\\", b'"', b"/", b"\\", b"|", b"?",
def getFeature(domain, nowdate): redisDomainDB = redis.Redis(host='127.0.0.1', port=6379, db=1) redisIPDB = redis.Redis(host='127.0.0.1', port=6379, db=2) redisCNAMEDB = redis.Redis(host='127.0.0.1', port=6379, db=3) offset = datetime.timedelta(days=7) beforeWeekDate = nowdate - offset psl = publicsuffixlist.PublicSuffixList(accept_unknown=False) vector = np.zeros(11) ipset = redisDomainDB.smembers(domain) alldays = [] weekdays = [] priratios = [] if (ipset is not None) and len(ipset) > 0: # 域名解析IP的个数 vector[0] = len(ipset) #ip承载的 for ip in ipset: #统计所有的域名数量 ip_domain_map = redisIPDB.hgetall(ip) ipAllNum = len(ip_domain_map) alldays.append(ipAllNum) pri_map = dict() weeknum = 0 for k, v in ip_domain_map.items(): domain_time_str = str(v) vs = domain_time_str[domain_time_str.index("'") + 1:domain_time_str.rindex("'")] domain_name_str = str(k) dns = domain_name_str[domain_name_str.index("'") + 1:domain_name_str.rindex("'")] domain_time = datetime.datetime.strptime(vs, '%Y%m%d%H%M%S') #如果该域名在一个星期之内 if domain_time > beforeWeekDate: weeknum = weeknum + 1 #统计子域名个数 domain_pri = psl.privatesuffix(dns) pri_num = pri_map.get(domain_pri) if pri_num is None: pri_map[domain_pri] = 1 else: pri_map[domain_pri] = pri_num + 1 weekdays.append(weeknum) priratios.append(max(pri_map.values()) / ipAllNum) #解析IP一周内的解析情况 if len(alldays) > 0: vector[1] = np.max(alldays) vector[2] = np.min(alldays) vector[3] = np.mean(alldays) # if len(weekdays) > 0: vector[4] = np.max(weekdays) vector[5] = np.min(weekdays) vector[6] = np.mean(weekdays) if len(priratios) > 0: vector[7] = np.max(priratios) vector[8] = np.min(priratios) vector[9] = np.mean(priratios) cnameset = redisCNAMEDB.smembers(domain) if cnameset is not None: vector[10] = 1 return vector
start_time = time.time() root = TrieNode('*') db = pymysql.connect(host='localhost', user='******', passwd='root', db="Bakalarka") cursor = db.cursor() cursor.execute("SELECT domain_name FROM Whitelist") results = cursor.fetchall() for r in results: add(root, r[0]) file = open('/Users/martinapivarnikova/Documents/anonymized_queries') s = open('/Users/martinapivarnikova/Documents/trie.txt', 'w') # print(trie('a.root-servers.net'[::-1])) for line in file: split_dq = line.split(" ") dn = split_dq[8] domain_name = publicsuffixlist.PublicSuffixList().subdomain(dn, 0) if domain_name is not None and trie(domain_name) is False: s.write(dn) s.write('\n') print("--- %s seconds ---" % (time.time() - start_time))