def run(self, app=None, data_key=None, data_path=None, config_info=None): """ :param data_key : If no dataframe, get model from this model_key :param data_path: If no dataframe and no data_key, get model from this model_path :param config_info: 配置文件路径 :return: 结果返回到redis """ if isinstance(config_info, bytes): config_info = json.loads(config_info.decode("utf-8")) # from css.ai.algo.utils.public_methods import PublicMethods # 获取上一个组件的输出结果,不动 data = PublicMethods().get_data(app=app, data_path=data_path, data_key=data_key) model_path = config_info.get("model_path") # 用来保存训练好的模型 # 获取自定义参数,自行编写 inputCols = config_info.get("inputCols") outputCol = config_info.get("outputCol") custom_1 = config_info.get("custom_1") custom_2 = config_info.get("custom_2") # 生成新的参数及初始化自定义Estimator,自行编写 config_custom = {} config_custom["inputCols"] = inputCols config_custom["outputCol"] = outputCol config_custom["custom_1"] = eval(custom_1) # 传递过来值类型都是str,所以使用eval()方法取值 config_custom["custom_2"] = eval(custom_2) custom_stage = [("CustomEstimator", config_custom)] # 不动 data_new, model, stage_list = PublicMethods().pipeline_fit_transform(custom_stage, data) stage_model = PipelineModel(stage_list) stage_model.write().overwrite().save(model_path) ret = PublicMethods.wrapper_cols_keys_cache_data(app, data_new, stage_key=model.uid, stage_list=stage_list, class_name=self.name, postfixes=None) ret.append(["trained_model", 1, model_path]) return ret
def save_pipeline(pipeline: PipelineModel, filepath:str) -> None: ''' Serialize the fitted pipeline :param pipeline: :param filepath: :return: None ''' pipeline.write().overwrite().save(path=filepath)