def run(self): """ 持续从服务端下载文件到本地 :return: """ # 先下载全量包 # self.get_initial_resp() curver = G_CONFIG.curver while True: result = self.get_upgrade_resp({"curver": curver, "limit": 100}) count = result.get("count", 0) curver = result.get("nextver", curver) files = result.get("files", []) if count: for file in files: if exec_func_times_if_error(self.download, url=file["link"], file_name=file["name"], times=10): # status_upload(file["name"].split(".")[0], "download") continue else: logger.error("尝试10次未下载成功") # status_upload(file["name"].split(".")[0], "download_fail") time.sleep(10)
def execute_many_sql_with_commit(self, param_list): """ 执行更新或插入语句,批量插入 :param sql: :param param_list: [(), (), ...] :return: """ if not param_list: return 0 try: conn = self.pool.connection() cursor = conn.cursor() sql = "INSERT INTO `{table}` " \ "(`ip`, `type`, `risk_tag`, `risk_score`, `risk_level`,`country`, `province`, `city`, `district`, " \ "`owner`,`latitude`,`longitude`,`adcode`,`areacode`,`continent`) " \ "VALUES(%(ip)s, %(type)s, %(risk_tag)s, %(risk_score)s, %(risk_level)s,%(country)s, %(province)s, " \ "%(city)s,%(district)s, %(owner)s, %(latitude)s,%(longitude)s,%(adcode)s," \ "%(areacode)s,%(continent)s) ON DUPLICATE KEY UPDATE risk_score=VALUES(risk_score)," \ " risk_tag=VALUES(risk_tag),risk_level=VALUES(risk_level)" sql = sql.format(table=self.table) cursor.executemany(sql, param_list) conn.commit() cursor.close() conn.close() return 1 except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return 0
def batch_insert(self, data_list): """ 批量插入信息 :param data_list: [{"ip": str, IP "type": ,IP类型,包括家庭宽带、数据中心、移动网络、企业专线、校园单位、未知 "risk_tag": 风险标签,包括秒拨、代理、数据中心、无 "risk_score": 风险分数,范围0-100,分数越高被黑产持有的概率也就越高 "risk_level":风险等级,包括高、中、低、无 "country": 国家 "province": 省份 "city": 城市 "district": 区县 "owner": 运营商 "latitude": 纬度 "longitude": 经度 "adcode": 行政区划代码 "areacode": 国家编码 "continent": 大洲 }...] :return: int(1: success, 0: failed) """ if not data_list: return 0 try: self.get_conn().insert_many(data_list, ordered=False, bypass_document_validation=True) return 1 except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return 0
def get_upgrade_resp(self, data): """ 获取更新包下载链接信息 :param data: :return: dict() """ url = "https://{host}/api/v6/upgrade/".format(host=G_CONFIG.host) pstr = json.dumps(data) cstr = aes_encrypt_seg(pstr) payload = {"snuser": G_CONFIG.user["snuser"], "data": cstr} try: r = requests.post(url, data=json.dumps(payload)) rjson = json.loads(r.text) if rjson["status"] == 200: return json.loads(aes_decrypt_seg(rjson["data"])) else: logger.warn("error code is {}".format(rjson["status"])) return {} except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return {}
def run(self): """ 持续从服务端下载文件到本地 :return: """ # 先下载全量包 # self.get_initial_resp() curver = G_CONFIG.curver upgrade_file = read_temp_file("upgrade_version") if upgrade_file != "": curver = upgrade_file while True: result = self.get_upgrade_resp({"curver": curver, "limit": 100}) count = result.get("count", 0) curver = result.get("nextver", curver) files = result.get("files", []) if count: for file in files: if exec_func_times_if_error(self.download, url=file["link"], file_name=file["name"], times=10): continue else: logger.error("尝试10次未下载成功") time.sleep(10)
def deal_with_files(cls, database, files): try: start_time = time.time() logger.info("consume run") lines = cls.load_file(files) datas, ips = cls.init_data(lines) if database == "mongodb": logger.debug("mongo") cls.change_risk_tag_already_in_mongo(datas, ips) logger.debug("change_risk_tag_already_in_mongo done") values = datas.values() logger.debug(values) cls.batch_update_into_mongo(values, files) logger.info("入库 耗时:{0}, 共插入数据{1}".format( time.time() - start_time, len(values))) elif database == "mysql": cls.change_risk_tag_already_in_mysql(datas, ips) values = datas.values() cls.batch_update_into_mysql(values, files) logger.info("入库 耗时:{0}, 共插入数据{1}".format( time.time() - start_time, len(values))) pass except Exception as e: logger.error(e) logger.error("报错文件:{0}/n, {1}".format(files, traceback.format_exc())) finally: for file in files: remove_file(file)
def execute_update_sql_with_commit(self, param_list): """ 执行更新或插入语句,单条插入 :param sql: :param param_list: [(), (), ...] :return: """ if not param_list: return 0 try: conn = self.pool.connection() with conn.cursor() as cursor: for data in param_list: insert_sql = "INSERT INTO `{table}` " \ "(`ip`, `type`, `risk_tag`, `risk_score`, `risk_level`,`country`, `province`, `city`, `district`, " \ "`owner`,`latitude`,`longitude`,`adcode`,`areacode`,`continent`) " \ "VALUES(%(ip)s, %(type)s, %(risk_tag)s, %(risk_score)s, %(risk_level)s,%(country)s, %(province)s, " \ "%(city)s,%(district)s, %(owner)s, %(latitude)s,%(longitude)s,%(adcode)s," \ "%(areacode)s,%(continent)s) ON DUPLICATE KEY UPDATE risk_score=VALUES(risk_score)," \ " risk_tag=VALUES(risk_tag),risk_level=VALUES(risk_level), country=VALUES(country), " \ "province=VALUES(province),city=VALUES(city),district=VALUES(district),owner=VALUES(owner)," \ "latitude=VALUES(latitude),longitude=VALUES(longitude), adcode=VALUES(adcode)," \ "continent=VALUES(continent), `type`=VALUES(`type`);" insert_sql = insert_sql.format(table=self.table) cursor.execute(insert_sql, data) conn.commit() conn.close() return 1 except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return 0
def deal_with_files(cls, database, files): try: start_time = time.time() logger.info("consume run") lines = cls.load_file(files) datas, ips = cls.init_data(lines) if database == "mongodb": logger.debug("mongo") values = datas.values() logger.debug(values) cls.batch_update_into_mongo(values) write_temp_file("upgrade_version", files[len(files) - 1]) logger.info("入库 耗时:{0}, 共插入数据{1}".format(time.time() - start_time, len(values))) elif database == "mysql": values = datas.values() cls.batch_update_into_mysql(values) write_temp_file("upgrade_version", files[len(files) - 1]) logger.info("入库 耗时:{0}, 共插入数据{1}".format(time.time() - start_time, len(values))) pass except Exception as e: logger.error(e) logger.error("报错文件:{0}/n, {1}".format(files, traceback.format_exc())) finally: for file in files: remove_file(file)
def batch_update_into_mysql(cls, values, files): if exec_func_times_if_error( IpToMysql.get_instance().execute_many_sql_with_commit, values, times=5): cls.batch_upload(files, "db") else: logger.error("尝试5次未下载成功") cls.batch_upload(files, "db_fall") return values
def batch_update_into_mongo(cls, values, files): if exec_func_times_if_error(IpToMongoDB.get_instance().batch_update, values, times=5): cls.batch_upload(files, "db") else: logger.error("尝试5次未下载成功") cls.batch_upload(files, "db_fall") return values
def execute_update_sql_with_commit(self, param_list): """ 执行更新或插入语句,单条插入 :param sql: :param param_list: [(), (), ...] :return: """ if not param_list: return 0 try: conn = self.pool.connection() with conn.cursor() as cursor: for data in param_list: query_sql = "SELECT `risk_tag` from `{table}` where ip=%(ip)s" query_sql = query_sql.format(table=self.table) rows = cursor.execute(query_sql, data) if rows == 0: insert_sql = "INSERT INTO `{table}` " \ "(`ip`, `type`, `risk_tag`, `risk_score`, `risk_level`,`country`, `province`, `city`, " \ "`district`, `owner`,`latitude`,`longitude`,`adcode`,`areacode`,`continent`) " \ "VALUES(%(ip)s, %(type)s, %(risk_tag)s, %(risk_score)s, %(risk_level)s,%(country)s, " \ "%(province)s, %(city)s,%(district)s, %(owner)s, %(latitude)s,%(longitude)s,%(adcode)s," \ "%(areacode)s,%(continent)s)" insert_sql = insert_sql.format(table=self.table) cursor.execute(insert_sql, data) else: result = cursor.fetchone() new_tag = data["risk_tag"][:2] index = result[0].find(new_tag) # 风险标签是否存在,存在则更新时间,不存在则追加 if index == -1: if data["type"] == "数据中心": result[0] = result[0].replace("机房流量", "") data["risk_tag"] = result[0] + "|" + data[ "risk_tag"] else: data["risk_tag"] = result[0].replace( result[0][index + 3:index + 22], data["risk_tag"][3:22]) update_sql = "UPDATE `{table}` set `risk_tag` =%(risk_tag)s,`risk_score` =%(risk_score)s," \ "`risk_level`=%(risk_level)s where ip=%(ip)s" update_sql = update_sql.format(table=self.table) cursor.execute(update_sql, data) conn.commit() conn.close() return 1 except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return 0
def download_initial(cls, url, file_name): """ 从oss下载数据重命名后保存 :param url: oss下载链接 :param file_name: 文件名 :return: bool """ try: file_path = "./download/" + file_name down_cmd = "wget '{0}' -O {1}".format(url, file_path) subprocess.call(down_cmd, shell=True) # status_upload(file_name.split(".")[0], "download") return True except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return False
def write_data(cls, database, filename): try: with open("./task/{}".format(filename), "r") as f: json_data_list = list() for lines in f: parse_line = parse_data(lines) json_data_list.append(parse_line) if len(json_data_list) >= 100000: cls.insert_to_db(database, json_data_list) json_data_list = list() cls.insert_to_db(database, json_data_list) # logger.info("入库 耗时:{0}, 共插入数据{1}".format(time.time() - start_time, len(json_data_list))) except Exception as e: logger.error(e) logger.error("报错文件:{0}/n, {1}".format(filename, traceback.format_exc())) finally: cmd = "rm -rf ./task/{}".format(filename) subprocess.call(cmd, shell=True) return
def get_initial_resp(self): url = "https://{host}/api/v6/initial/".format(host=G_CONFIG.host) payload = {"snuser": G_CONFIG.user["snuser"]} logger.info(payload) try: r = requests.post(url, data=json.dumps(payload)) rjson = json.loads(r.text) if rjson["status"] == 200: result = json.loads(aes_decrypt_seg(rjson["data"])) if self.download_initial(url=result["file"]["link"], file_name=result["file"]["name"]): # status_upload(result["file"]["name"].split(".")[0], "download") return result["file"]["name"] else: logger.warn("error code is {}".format(rjson["status"])) return "" except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return ""
def batch_update(self, data_list): """ 批量操作,存在则更新,不存在则insert :param data_list: :return: """ if not data_list: return 0 update_operations = list() try: for data in data_list: op = ReplaceOne({"ip": data["ip"]}, replacement=data, upsert=True) update_operations.append(op) self.get_conn().bulk_write(update_operations, ordered=False) return 1 except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return 0
def execute_sql_find_wigh_ips(self, param_list): results = [] if not param_list: return results try: conn = self.pool.connection() with conn.cursor() as cursor: query_sql = 'SELECT * from `{table}` where ip in (%s)' % ",".join( ["%s"] * len(param_list)) query_sql = query_sql.format(table=self.table) logger.info("query_sql") logger.info(query_sql) cursor.execute(query_sql, param_list) for row in cursor.fetchall(): data = { "ip": row[0], "type": row[1], "risk_tag": row[2], "risk_score": row[3], "risk_level": row[4], "country": row[5], "province": row[6], "city": row[7], "district": row[8], "owner": row[9], "latitude": row[10], "longitude": row[11], "adcode": row[12], "areacode": row[13], "continent": row[14], } results.append(data) return results except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return results
def download(cls, url, file_name): """ 从oss下载数据重命名后保存 :param url: oss下载链接 :param file_name: 文件名 :return: bool """ try: r = requests.get(url) file_path = "./download/" + file_name with open(file_path, "wb") as code: code.write(r.content) cmd = "gunzip {file}".format(file=file_path) logger.info(cmd) subprocess.call(cmd, shell=True) name_upzip = file_name.split(".")[0] mv_cmd = "mv ./download/{file} ./task/".format(file=name_upzip) logger.info(mv_cmd) subprocess.call(mv_cmd, shell=True) return True except Exception as e: logger.error(e) logger.error(traceback.format_exc()) return False
def batch_update_into_mongo(cls, values): if IpToMongoDB.get_instance().batch_update(values): logger.error("尝试5次未下载成功") return values
def batch_update_into_mysql(cls, values): if IpToMysql.get_instance().execute_many_sql_with_commit(values): logger.error("尝试5次未下载成功") return values