def __generate_model(self, data, task_id): """ Start train a model :param data: Training dataset. :param task_id: The id of the training task. """ xgb_obj = xgboosting.XGBoosting() # pylint: disable=unused-variable ret_code, ret_data = xgb_obj.xgb_train(data, task_id) current_timestamp = int(time.time()) train_op_obj = train_op.TrainOperation() if ret_code == 0: train_status = "complete" params = { "task_id": task_id, "end_time": current_timestamp, "status": train_status, "model_name": task_id + "_model" } else: train_status = "failed" params = { "task_id": task_id, "end_time": current_timestamp, "status": train_status, "model_name": "" } train_op_obj.update_model_info(params)
def process_train(self, data): """ Start a process to train model :param data: Training dataset. """ sample_params = { "trainOrTest": data["trainOrTest"], "positiveOrNegative": data["positiveOrNegative"], "source": data["source"], "beginTime": data["beginTime"], "endTime": data["endTime"] } samples = self.sample_op_obj.sample_query_all(sample_params) train_op_obj = train_op.TrainOperation() samples_list = [] positive_count = 0 negative_count = 0 for index in samples: samples_list.append({ "flag": index["flag"], "data": map(int, index["data"].split(',')) }) if index["flag"] == 1: positive_count = positive_count + 1 else: negative_count = negative_count + 1 task_id = str(int(round(time.time() * 1000))) train_params = { "begin_time": int(time.time()), "end_time": int(time.time()), "task_id": task_id, "status": "running", "source": data["source"], "sample_num": len(samples_list), "postive_sample_num": positive_count, "negative_sample_num": negative_count } if positive_count == 0 or negative_count == 0: return build_ret_data(LACK_SAMPLE, "") train_op_obj.insert_train_info(train_params) try: t = threading.Thread(target=self.__generate_model, args=( samples_list, task_id, )) t.setDaemon(False) t.start() except Exception: train_status = "failed" params = { "task_id": task_id, "end_time": int(time.time()), "status": train_status, "model_name": "" } train_op_obj.update_model_info(params) return build_ret_data(OP_SUCCESS, "")
def __generate_model(self, data, task_id): """ Start train a model :param data: Training dataset.This is a list and data such as below: data -> samples_list=[{"flag": x, "data": "346", "353", "321", ...}, {"flag": y, "data": "346", "353", "321", ...}, {"flag": z, "data": "346", "353", "321", ...}, ...... ] :param task_id: The id of the training task. """ xgb_obj = xgboosting.XGBoosting() # pylint: disable=unused-variable # jizhi 调用 xgboost 算法模型, # 传输参数: # task_id - 时间戳 # data - 样本数据集 # 返回值含义: # ret_code: 执行正确or错误码 ret_code, ret_data = xgb_obj.xgb_train(data, task_id) current_timestamp = int(time.time()) # jizhi 初始化数据库的链接 train_op_obj = train_op.TrainOperation() if ret_code == 0: train_status = "complete" params = { "task_id": task_id, "end_time": current_timestamp, "status": train_status, "model_name": task_id + "_model" } else: train_status = "failed" params = { "task_id": task_id, "end_time": current_timestamp, "status": train_status, "model_name": "" } # jizhi 到此,模型训练成功,在表 train_task 更新训练完成的模型信息 train_op_obj.update_model_info(params)
def process_train(self, data): """ Start a process to train model :param data: Training dataset. """ sample_params = { "trainOrTest": data["trainOrTest"], "positiveOrNegative": data["positiveOrNegative"], "source": data["source"], "beginTime": data["beginTime"], "endTime": data["endTime"] } # jizhi 依据页面选择的样本信息,从数据库中获取样本数据 # jizhi 调用dao.time_series_detector.sample_op.SampleOperation.sample_query_all()函数, # 依据前端选择的训练集和时间信息从数据库 metis 对应的 sample_dataset 表中抽取数据,返回 sample_list samples = self.sample_op_obj.sample_query_all(sample_params) # jizhi 返回的数据列表 samples 内容形式: # samples -> samples_list[{"flag": "0", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."}, # {"flag": "1", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."}, # {"flag": "0", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."}, # ...... # ] # jizhi 初始化训练模型函数 app.dao.time_series_detector.TrainOperation(), 就是初始化连接数据库 train_op_obj = train_op.TrainOperation() samples_list = [] positive_count = 0 negative_count = 0 # jizhi samples 是返回的样本数据列表 for index in samples: # jizhi map函数将str列表转化成int字典值value samples_list.append({ "flag": index["flag"], "data": map(int, index["data"].split(',')) }) # jizhi 统计正负样本数量 if index["flag"] == 1: positive_count = positive_count + 1 else: negative_count = negative_count + 1 # jizhi round 函数实现对当前时间戳 time.time() 浮点值扩大 1000 倍的四舍五入值 task_id = str(int(round(time.time() * 1000))) train_params = { "begin_time": int(time.time()), "end_time": int(time.time()), "task_id": task_id, "status": "running", "source": data["source"], "sample_num": len(samples_list), "postive_sample_num": positive_count, "negative_sample_num": negative_count } if positive_count == 0 or negative_count == 0: return build_ret_data(LACK_SAMPLE, "") # jizhi 插入数据到 metis 数据库的表 train_task 中,状态是 running train_op_obj.insert_train_info(train_params) try: # jizhi 到此完成数据准备,开始调用算法进行模型训练 # jizhi 传入参数,samples_list 处理好的数据和 task_id 时间戳 # jizhi 算法计算时,是一次性传入全部 A B C 的数据 t = threading.Thread(target=self.__generate_model, args=( samples_list, task_id, )) t.setDaemon(False) t.start() except Exception: train_status = "failed" params = { "task_id": task_id, "end_time": int(time.time()), "status": train_status, "model_name": "" } # jizhi 训练模型失败,更新模型信息 train_op_obj.update_model_info(params) return build_ret_data(OP_SUCCESS, "")