def update_top_sec_num(): """ 更新表msvr_ssvr,统计顶级服务器和二级服务器数量 """ print str(datetime.now()), '开始更新数据库中顶级和二级服务器数量(msvr_ssvr)' db = MySQL(DESTINATION_CONFIG) sql = 'UPDATE msvr_ssvr SET msvr_ssvr.ssvr=(SELECT COUNT(DISTINCT sec_svr) FROM top_sec_svr)' db.update(sql) sql = 'UPDATE msvr_ssvr SET msvr=(SELECT COUNT(DISTINCT addr) FROM whois_addr)' db.update(sql) db.close() print str(datetime.now()), '结束更新数据库中顶级和二级服务器数量(msvr_ssvr)'
def update_url_features(url_feature): """ 更新数据库中词汇特征 :param url_feature: :return: """ db = MySQL() sql = 'UPDATE url_features SET url_length = "%s", domain="%s",domain_tokens="%s",domain_characters="%s",path="%s", ' \ 'path_tokens="%s",path_characters="%s",path_brand="%s" WHERE url="%s"' % \ (url_feature[1],url_feature[2],url_feature[3],url_feature[4],url_feature[5],url_feature[6], url_feature[7],url_feature[8],url_feature[0]) db.update(sql) db.close()
def delete_empty_rc(domain): """ 删除空白域名DNS记录,但弱最后一条为空白,则不删除 方法:1)获取最新记录的last_updated时间 2)删除除最新last_updated所有域名的时间 """ db = MySQL(SOURCE_CONFIG) tb_name = domain2tb(domain) # 获取最近一条记录的last_updated时间 max_time_sql = 'select max(last_updated) from ' + tb_name + ' WHERE domain= "%s" ' db.query(max_time_sql % domain) max_time = db.fetch_all_rows()[0][0] # 得到最大值 # 删除某次探测为空的数据,但不删除最后一条记录 delete_sql = 'DELETE FROM ' + tb_name + ' WHERE (last_updated = insert_time) AND ips = "" AND domain="%s" AND last_updated != "%s"' db.update(delete_sql % (domain, str(max_time))) db.close()
def merge_same_rc(domain): """ 最简单的合并相同的记录 todo: 1. 以某时间窗口来进行合并,去掉因为探测导致的问题 """ domain_rc = [] rc_last_updated = [] db = MySQL(SOURCE_CONFIG) tb_name = domain2tb(domain) sql = 'select ips, cnames, ns,last_updated from ' + tb_name + ' WHERE domain="%s"' db.query(sql % domain) rc = db.fetch_all_rows() # 域名记录预处理 for i in rc: domain_rc.append([i[0], i[1], i[2]]) rc_last_updated.append(str(i[3])) # 遍历修改记录 for i in range(0, len(domain_rc) - 1): if sorted(domain_rc[i][0]) == sorted(domain_rc[i+1][0]) and sorted(domain_rc[i][1])==sorted(domain_rc[i+1][1]) \ and sorted(domain_rc[i][2])==sorted(domain_rc[i+1][2]): # 前后两次的记录是否一致 ## 若相邻一致的情况 # 先删除后者记录 delete_sql = 'delete from ' + tb_name + ' WHERE domain="%s" and last_updated="%s"' db.update(delete_sql % (domain, rc_last_updated[i + 1])) # 再将前者记录的last_updated时间修改为后者记录的时间 update_sql = 'update ' + tb_name + ' set last_updated="%s" WHERE domain="%s" AND last_updated="%s"' db.update(update_sql % (rc_last_updated[i + 1], domain, rc_last_updated[i])) else: pass db.close()
def reduce_warning_times(process_id): db = MySQL(SOURCE_CONFIG) sql = 'update process_info set warning_times = warning_times -1 where process_id = "%s"' % process_id db.update(sql) db.close()