def fetch_domains(): """ To fetch urls from the database, and return urls """ tld = {} db1 = MySQL(DBCONFIG1) db2 = MySQL(DBCONFIG2) sql = 'SELECT SUBSTRING_INDEX(tld,".",-1) as a,count(*) FROM domain_features GROUP BY a' db1.query(sql) tld_count = db1.fetch_all_rows() for i in tld_count: print i if len(i[0]) == 0: continue sql = 'SELECT type FROM tld_details WHERE tld="%s"' % ('.' + i[0]) db2.query(sql) urls2 = db2.fetch_all_rows() try: if urls2[0][0] in tld.keys(): tld[urls2[0][0]] += i[1] else: tld[urls2[0][0]] = i[1] except: continue print tld db1.close() db2.close()
def master_control(): """主线程控制""" try: db = MySQL(SOURCE_CONFIG) except: print "连接数据库失败" queue.task_done() return while queue.qsize(): # 重要,以前为1,导致线程不结束 check_domain, visit_times = queue.get() check_domain = str(check_domain.strip()) print '线程数量:', threading.activeCount(), ',任务数量:', queue.qsize() print "域名:", check_domain, datetime.now().strftime("%Y-%m-%d %H:%M:%S") tb_name = domain2tb(check_domain) if not tb_name: # 是否转换成功 print "域名名称异常,无法转换为表名称:", check_domain queue.task_done() continue rc_ttl = manage_rc_ttl(check_domain) rc_ttl = rc2str(rc_ttl) # 转换为字符串 if visit_times == 0: # 第一次探测域名直接插入到数据库中 insert_rc_db(db, tb_name, check_domain, rc_ttl) else: update_data(db, tb_name, check_domain, rc_ttl) queue.task_done() # time.sleep(1) # 去掉偶尔会出现错误 db.close() # 关闭数据库
def fetch_mal_domains(): """获取待查询的域名列表""" db = MySQL(SOURCE_CONFIG) sql = 'SELECT domain,visit_times FROM domain_records LIMIT 0,71622' db.query(sql) query_domains = db.fetch_all_rows() # 得到总共的数量 db.close() return query_domains
def fetch_domains(): """ To fetch urls from the database, and return urls """ db = MySQL() sql = 'SELECT domain FROM domain_white_list' db.query(sql) urls = db.fetch_all_rows() return tuple_to_list(urls)
def obtain_monitoring_host_process(): """获取待监测的进程信息""" db = MySQL(SOURCE_CONFIG) # 获取超过间隔时间的进程 sql = fetch_sql db.query(sql) host_process_result = db.fetch_all_rows() host_process = group_host_process(host_process_result) db.close() return host_process
def get_data_from_source(tb_name): """ :param name:string 数据库名称 :return: results 数据库whois数量分布 """ source_db = MySQL(SOURCE_CONFIG) source_db.query('select tld,flag,count(*) as whois_sum from domain_whois_%s group by tld,flag' % (tb_name)) results = source_db.fetch_all_rows() source_db.close() return results
def fetch_urls(): """ 获取要提取词汇特征的网址 """ db = MySQL() sql = 'SELECT url FROM url_features' db.query(sql) urls = db.fetch_all_rows() db.close() return tuple_to_list(urls)
def fetch_source_data(tb_name): """获取源数据 :param tb_name: string 表名 :return: results 数据 """ db = MySQL(DESTINATION_CONFIG) db.query('SELECT tld,flag,whois_sum FROM domain_whois_%s WHERE flag <> "-6" GROUP BY tld,flag' % tb_name) results = db.fetch_all_rows() db.close() return results
def fetch_resource_data(tb_name): """获得源数据 :param tb_name: string 表名 :return: results: 查询结果 """ db = MySQL(DESTINATION_CONFIG) db.query('SELECT tld, SUM(whois_sum) FROM domain_whois_%s GROUP BY tld' % tb_name) results = db.fetch_all_rows() db.close() return results
def fetch_data(): """ 从数据库中获取数据基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT url_length,path_tokens,path_brand,domain_tokens,malicious,domain_characters,path_characters FROM url_features_copy_copy' db.query(sql) urls = db.fetch_all_rows() db.close() return urls
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT tld FROM domain_features WHERE tld_token_count = "2"' db.query(sql) tlds = db.fetch_all_rows() db.close() return tlds
def fetch_data(): """ 从数据库中获取域名(domain)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT domain FROM domain_features limit 1000' db.query(sql) domains = db.fetch_all_rows() db.close() return domains
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT domain_length FROM domain_features' db.query(sql) domains_length = db.fetch_all_rows() db.close() return domains_length
def fetch_mal_domains(): """ 获取待查询的域名列表 """ db = MySQL(SOURCE_CONFIG) sql = 'SELECT domain, malicious_type FROM domain_index LIMIT 143244,71622' db.query(sql) query_domains = db.fetch_all_rows() # 得到总共的数量 db.close() return query_domains
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT domain_characters,domain_digit FROM domain_features' db.query(sql) tlds = db.fetch_all_rows() db.close() return tlds
def update_db(): """更新数据库""" tld_num = sum_all_domains() db = MySQL(DESTINATION_CONFIG) db.truncate('TRUNCATE TABLE domain_summary') update_sql = 'INSERT INTO domain_update(tld_name, domain_num) VALUES("%s", "%s")' summary_sql = 'INSERT INTO domain_summary(tld_name, domain_num) VALUES("%s", "%s")' for tld,num in tld_num: db.insert_no_commit(update_sql % (tld,num)) db.insert_no_commit(summary_sql % (tld,num)) db.commit() db.close()
def insert_db(): url_set = pd.read_csv('csv/kt-content.csv') db = MySQL() for i in range(len(url_set)): print url_set.ix[i].values sql = 'Insert ignore into benign_urls(url,type)VALUES ("%s","%s")' % ( url_set.ix[i].values[1], url_set.ix[i].values[2]) db.insert_no_commit(sql) db.commit() db.close()
def test(results): db = MySQL() for url_feature in results: print url_feature sql = 'UPDATE url_features SET url_length = "%s", domain="%s",domain_tokens="%s",domain_characters="%s",path="%s", ' \ 'path_tokens="%s",path_characters="%s",path_brand="%s" WHERE url="%s"' % \ (url_feature[1],url_feature[2],url_feature[3],url_feature[4],url_feature[5],url_feature[6], url_feature[7],url_feature[8],url_feature[0]) db.no_update(sql) db.commit() db.close()
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT tld FROM domain_features' sql = 'SELECT SUBSTRING_INDEX(tld,".",-1) as a FROM domain_features' db.query(sql) domains_length = db.fetch_all_rows() db.close() return domains_length
def get_resource_data(tb_name): """ 获得基础数据 :param tb_name:string 表名 :return: 返回基础数据 """ db = MySQL(DESTINATION_CONFIG) db.query( 'SELECT tld, SUM(whois_sum) AS count FROM domain_whois_%s WHERE flag <> "-6" GROUP BY tld' % tb_name) results = db.fetch_all_rows() return results
def update_day(): """更新表whois_sum_by_day,即统计数据库中最新的域名whois总量 """ print str(datetime.now()), '开始更新当天数据库中最新的WHOIS总量(whois_sum_by_day)' db = MySQL(DESTINATION_CONFIG) sql = 'INSERT INTO whois_sum_by_day(sum) SELECT MAX(tld_sum) FROM whois_sum \ WHERE to_days(insert_time) = to_days(now())' db.insert(sql) db.close() print str(datetime.now()), '结束更新当天数据库中最新的WHOIS总量(whois_sum_by_day)'
def update_destination(tb_name, results): """ 更新数据库 :param results: :return: """ destination_db = MySQL(DESTINATION_CONFIG) destination_db.truncate('TRUNCATE TABLE domain_whois_%s' % tb_name) for item in results: destination_db.insert_no_commit('INSERT INTO domain_whois_%s (tld,flag,whois_sum) VALUES ("%s","%s","%s")' % ( tb_name, item[0], item[1], item[2])) destination_db.commit() destination_db.close()
def update_top_sec_num(): """ 更新表msvr_ssvr,统计顶级服务器和二级服务器数量 """ print str(datetime.now()), '开始更新数据库中顶级和二级服务器数量(msvr_ssvr)' db = MySQL(DESTINATION_CONFIG) sql = 'UPDATE msvr_ssvr SET msvr_ssvr.ssvr=(SELECT COUNT(DISTINCT sec_svr) FROM top_sec_svr)' db.update(sql) sql = 'UPDATE msvr_ssvr SET msvr=(SELECT COUNT(DISTINCT addr) FROM whois_addr)' db.update(sql) db.close() print str(datetime.now()), '结束更新数据库中顶级和二级服务器数量(msvr_ssvr)'
def fetch_resource_data(): """ 获得待查询whois信息的域名,包括域名名称、更新时间、到期时间和详细信息,顶级域名 注意: domain_whois表中的域名是由domain_index中根据触发器更新的 """ db = MySQL(SOURCE_CONFIG) sql = 'SELECT domain,update_date,expiration_date,LENGTH (details),tld,flag FROM domain_whois WHERE flag = 1 LIMIT 0,80000' db.query(sql) query_domains = db.fetch_all_rows() # 得到总共的数量 db.close() return query_domains
def update_url_features(url_feature): """ 更新数据库中词汇特征 :param url_feature: :return: """ db = MySQL() sql = 'UPDATE url_features SET url_length = "%s", domain="%s",domain_tokens="%s",domain_characters="%s",path="%s", ' \ 'path_tokens="%s",path_characters="%s",path_brand="%s" WHERE url="%s"' % \ (url_feature[1],url_feature[2],url_feature[3],url_feature[4],url_feature[5],url_feature[6], url_feature[7],url_feature[8],url_feature[0]) db.update(sql) db.close()
def update_db(): """更新数据库""" tld_whois = sum_all_whois() total = 0 db = MySQL(DESTINATION_CONFIG) sql = 'INSERT INTO tld_whois_sum_history(tld,whois_sum) VALUES("%s", "%s")' # 插入数据 for tld, whois in tld_whois: db.insert_no_commit(sql % (tld, whois)) total += whois db.commit() sql = 'INSERT INTO whois_sum(tld_sum) VALUES("%s")' db.insert(sql % total) db.close()
def insert_db(): file_name = open('small_benign.txt') urls = file_name.readlines() db = MySQL() count = 0 for url in urls: sql = 'Insert into url_features(url,malicious)VALUES ("%s","0")' % ( url.strip()) db.insert_no_commit(sql) count += 1 db.commit() db.close() print count
def process_id_to_user(process_id): """通过process_id获取用户的邮箱地址和主机地址""" sql = "SELECT email,host_ip,pid,code_route,process_name,warning_times FROM `process_info`,user_account,host_info WHERE process_id = '%s' AND user_account.user_id = process_info.user_id AND host_info.host_id = process_info.host_id" db = MySQL(SOURCE_CONFIG) db.query(sql % process_id) email_host = db.fetch_all_rows() email = email_host[0][0] host = base64.decodestring(email_host[0][1]) pid = email_host[0][2] code_route = email_host[0][3] process_name = email_host[0][4] warning_times = email_host[0][5] db.close() return email, host,pid,code_route,process_name,warning_times
def insert_domains(domain_set): """ 更新数据库中词汇特征 :param url_feature: :return: """ db = MySQL() for domain in domain_set: sql = 'Insert into domain_features(domain,tld,tld_token_count,domain_length,domain_characters,domain_digit)VALUES ("%s","%s","%s","%s","%s","%s")' \ %(domain['domain'],domain['tld'],domain['tld_token_count'],domain['domain_length'],domain['character'],domain['digit']) db.insert_no_commit(sql) db.commit() db.close()
def connect_host(host,process_ids): """连接主机""" db = MySQL(SOURCE_CONFIG) host_error = False hostname, port, username, password = host client = paramiko.SSHClient() # 创建客户端 client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) hostname = base64.decodestring(hostname).strip() username = base64.decodestring(username).strip() password = base64.decodestring(password).strip() port = int(base64.decodestring(port).strip()) try: client.connect(hostname, port, username, password, timeout=3) except paramiko.BadHostKeyException, e: host_error = e