Ejemplo n.º 1
0
    def __generate_model(self, data, task_id):
        """
        Start train a model

        :param data: Training dataset.
        :param task_id: The id of the training task.
        """
        xgb_obj = xgboosting.XGBoosting()
        # pylint: disable=unused-variable
        ret_code, ret_data = xgb_obj.xgb_train(data, task_id)
        current_timestamp = int(time.time())
        train_op_obj = train_op.TrainOperation()
        if ret_code == 0:
            train_status = "complete"
            params = {
                "task_id": task_id,
                "end_time": current_timestamp,
                "status": train_status,
                "model_name": task_id + "_model"
            }
        else:
            train_status = "failed"
            params = {
                "task_id": task_id,
                "end_time": current_timestamp,
                "status": train_status,
                "model_name": ""
            }
        train_op_obj.update_model_info(params)
Ejemplo n.º 2
0
 def process_train(self, data):
     """
     Start a process to train model
     :param data: Training dataset.
     """
     sample_params = {
         "trainOrTest": data["trainOrTest"],
         "positiveOrNegative": data["positiveOrNegative"],
         "source": data["source"],
         "beginTime": data["beginTime"],
         "endTime": data["endTime"]
     }
     samples = self.sample_op_obj.sample_query_all(sample_params)
     train_op_obj = train_op.TrainOperation()
     samples_list = []
     positive_count = 0
     negative_count = 0
     for index in samples:
         samples_list.append({
             "flag": index["flag"],
             "data": map(int, index["data"].split(','))
         })
         if index["flag"] == 1:
             positive_count = positive_count + 1
         else:
             negative_count = negative_count + 1
     task_id = str(int(round(time.time() * 1000)))
     train_params = {
         "begin_time": int(time.time()),
         "end_time": int(time.time()),
         "task_id": task_id,
         "status": "running",
         "source": data["source"],
         "sample_num": len(samples_list),
         "postive_sample_num": positive_count,
         "negative_sample_num": negative_count
     }
     if positive_count == 0 or negative_count == 0:
         return build_ret_data(LACK_SAMPLE, "")
     train_op_obj.insert_train_info(train_params)
     try:
         t = threading.Thread(target=self.__generate_model,
                              args=(
                                  samples_list,
                                  task_id,
                              ))
         t.setDaemon(False)
         t.start()
     except Exception:
         train_status = "failed"
         params = {
             "task_id": task_id,
             "end_time": int(time.time()),
             "status": train_status,
             "model_name": ""
         }
         train_op_obj.update_model_info(params)
     return build_ret_data(OP_SUCCESS, "")
    def __generate_model(self, data, task_id):
        """
        Start train a model

        :param data: Training dataset.This is a list and data such as below:
                data -> samples_list=[{"flag": x, "data": "346", "353", "321", ...},
                                      {"flag": y, "data": "346", "353", "321", ...},
                                      {"flag": z, "data": "346", "353", "321", ...},
                                       ......
                                    ]
        :param task_id: The id of the training task.
        """
        xgb_obj = xgboosting.XGBoosting()
        # pylint: disable=unused-variable
        # jizhi 调用 xgboost 算法模型,
        #       传输参数:
        #           task_id - 时间戳
        #           data - 样本数据集
        #       返回值含义:
        #           ret_code: 执行正确or错误码
        ret_code, ret_data = xgb_obj.xgb_train(data, task_id)
        current_timestamp = int(time.time())
        # jizhi 初始化数据库的链接
        train_op_obj = train_op.TrainOperation()
        if ret_code == 0:
            train_status = "complete"
            params = {
                "task_id": task_id,
                "end_time": current_timestamp,
                "status": train_status,
                "model_name": task_id + "_model"
            }
        else:
            train_status = "failed"
            params = {
                "task_id": task_id,
                "end_time": current_timestamp,
                "status": train_status,
                "model_name": ""
            }
        # jizhi 到此,模型训练成功,在表 train_task 更新训练完成的模型信息
        train_op_obj.update_model_info(params)
 def process_train(self, data):
     """
     Start a process to train model
     :param data: Training dataset.
     """
     sample_params = {
         "trainOrTest": data["trainOrTest"],
         "positiveOrNegative": data["positiveOrNegative"],
         "source": data["source"],
         "beginTime": data["beginTime"],
         "endTime": data["endTime"]
     }
     # jizhi 依据页面选择的样本信息,从数据库中获取样本数据
     # jizhi 调用dao.time_series_detector.sample_op.SampleOperation.sample_query_all()函数,
     #       依据前端选择的训练集和时间信息从数据库 metis 对应的 sample_dataset 表中抽取数据,返回 sample_list
     samples = self.sample_op_obj.sample_query_all(sample_params)
     # jizhi 返回的数据列表 samples 内容形式:
     #       samples -> samples_list[{"flag": "0", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."},
     #                               {"flag": "1", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."},
     #                               {"flag": "0", "data": "660,719,649 ...", 642,758,777 ...", "698,644,691 ..."},
     #                                ......
     #                              ]
     # jizhi 初始化训练模型函数 app.dao.time_series_detector.TrainOperation(), 就是初始化连接数据库
     train_op_obj = train_op.TrainOperation()
     samples_list = []
     positive_count = 0
     negative_count = 0
     # jizhi samples 是返回的样本数据列表
     for index in samples:
         # jizhi map函数将str列表转化成int字典值value
         samples_list.append({
             "flag": index["flag"],
             "data": map(int, index["data"].split(','))
         })
         # jizhi 统计正负样本数量
         if index["flag"] == 1:
             positive_count = positive_count + 1
         else:
             negative_count = negative_count + 1
     # jizhi round 函数实现对当前时间戳 time.time() 浮点值扩大 1000 倍的四舍五入值
     task_id = str(int(round(time.time() * 1000)))
     train_params = {
         "begin_time": int(time.time()),
         "end_time": int(time.time()),
         "task_id": task_id,
         "status": "running",
         "source": data["source"],
         "sample_num": len(samples_list),
         "postive_sample_num": positive_count,
         "negative_sample_num": negative_count
     }
     if positive_count == 0 or negative_count == 0:
         return build_ret_data(LACK_SAMPLE, "")
     # jizhi 插入数据到 metis 数据库的表 train_task 中,状态是 running
     train_op_obj.insert_train_info(train_params)
     try:
         # jizhi 到此完成数据准备,开始调用算法进行模型训练
         # jizhi 传入参数,samples_list 处理好的数据和 task_id 时间戳
         # jizhi 算法计算时,是一次性传入全部 A B C 的数据
         t = threading.Thread(target=self.__generate_model,
                              args=(
                                  samples_list,
                                  task_id,
                              ))
         t.setDaemon(False)
         t.start()
     except Exception:
         train_status = "failed"
         params = {
             "task_id": task_id,
             "end_time": int(time.time()),
             "status": train_status,
             "model_name": ""
         }
         # jizhi 训练模型失败,更新模型信息
         train_op_obj.update_model_info(params)
     return build_ret_data(OP_SUCCESS, "")