def test_get_pai_tf_cmd(self): conf = get_cluster_config({}) os.environ[ "SQLFLOW_OSS_CHECKPOINT_DIR"] = '''{"Arn":"arn", "Host":"host"}''' cmd = submitter.get_pai_tf_cmd( conf, "job.tar.gz", "params.txt", "entry.py", "my_dnn_model", "user1/my_dnn_model", "test_project.input_table", "test_project.val_table", "test_project.res_table", "test_project", "/tmp") expected = ( "pai -name tensorflow1150 -project algo_public_dev -DmaxHungTimeBeforeGCInSeconds=0 " "-DjobName=sqlflow_my_dnn_model -Dtags=dnn -Dscript=job.tar.gz -DentryFile=entry.py " "-Dtables=odps://test_project/tables/input_table,odps://test_project/tables/val_table " "-Doutputs=odps://test_project/tables/res_table -DhyperParameters=\"params.txt\" " "-DcheckpointDir='oss://sqlflow-models/user1/my_dnn_model/?role_arn=arn/pai2oss_test_project&host=host' " "-DgpuRequired='0'") self.assertEqual(expected, cmd) conf = get_cluster_config({"train.num_workers": 5}) cmd = submitter.get_pai_tf_cmd( conf, "job.tar.gz", "params.txt", "entry.py", "my_dnn_model", "user1/my_dnn_model", "test_project.input_table", "test_project.val_table", "test_project.res_table", "test_project", "/tmp") expected = ( "pai -name tensorflow1150 -project algo_public_dev -DmaxHungTimeBeforeGCInSeconds=0 " "-DjobName=sqlflow_my_dnn_model -Dtags=dnn -Dscript=job.tar.gz -DentryFile=entry.py " "-Dtables=odps://test_project/tables/input_table,odps://test_project/tables/val_table " "-Doutputs=odps://test_project/tables/res_table -DhyperParameters=\"params.txt\" " "-DcheckpointDir='oss://sqlflow-models/user1/my_dnn_model/?role_arn=arn/pai2oss_test_project&host=host' " r'''-Dcluster="{\"ps\": {\"count\": 1, \"cpu\": 200, \"gpu\": 0}, \"worker\": {\"count\": 5, \"cpu\": 400, \"gpu\": 0}}"''' ) self.assertEqual(expected, cmd) del os.environ["SQLFLOW_OSS_CHECKPOINT_DIR"]
def get_pai_explain_cmd(datasource, project, oss_model_path, model_name, data_table, result_table, model_type, model_params, job_file, params_file, label_column, cwd): """Get command to submit explain task to PAI Args: datasource: current datasource project: current project oss_model_path: the place to load model model_name: model used to do prediction data_table: data table from which to load explain data result_table: table to store prediction result model_type: type of th model, see also get_oss_saved_model_type model_params: parameters specified by WITH clause job_file: tar file incldue code and libs to execute on PAI params_file: extra params file lable_column: name of the label cwd: current working dir Returns: The command to submit a PAI explain task """ if model_type == EstimatorType.PAIML: cmd = get_explain_random_forests_cmd(datasource, model_name, data_table, result_table, label_column) else: conf = cluster_conf.get_cluster_config(model_params) cmd = get_pai_tf_cmd(conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE), "file://" + os.path.join(cwd, PARAMS_FILE), ENTRY_FILE, model_name, oss_model_path, data_table, "", result_table, project) return cmd
def submit_pai_explain(datasource, select, result_table, model_name, model_attrs): """This function pack need params and resource to a tarball and submit a explain task to PAI Args: datasource: current datasource select: sql statement to get explain data set result_table: the table name to save result model_name: model used to do prediction model_params: dict, Params for training, crossponding to WITH clause """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") # TODO(typhoonzero): Do **NOT** create tmp table when the select statement # is like: "SELECT fields,... FROM table" data_table = create_tmp_table_from_select(select, datasource) params["data_table"] = data_table # format resultTable name to "db.table" to let the codegen form a # submitting argument of format "odps://project/tables/table_name" project = get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = get_oss_model_save_path(datasource, model_name) model_type, estimator = get_oss_saved_model_type_and_estimator( oss_model_path, project) params["oss_model_path"] = oss_model_path label_column = model_attrs.get("label_col") params["label_column"] = label_column create_explain_result_table(datasource, data_table, result_table, model_type, estimator, label_column) conf = cluster_conf.get_cluster_config(model_attrs) if model_type == EstimatorType.PAIML: cmd = get_explain_random_forests_cmd(datasource, model_name, data_table, result_table, label_column) else: if model_type == EstimatorType.XGBOOST: params["entry_type"] = "explain_xgb" else: params["entry_type"] = "explain_tf" prepare_archive(cwd, conf, project, estimator, model_name, data_table, "", oss_model_path, params) cmd = get_pai_tf_cmd(conf, "file://" + path.join(cwd, JOB_ARCHIVE_FILE), "file://" + path.join(cwd, PARAMS_FILE), ENTRY_FILE, model_name, oss_model_path, data_table, "", result_table, project) submit_pai_task(cmd, datasource) drop_tables([data_table], datasource)
def submit_pai_evaluate(datasource, model_name, select, result_table, model_attrs, user=""): """Submit a PAI evaluation task Args: datasource: current datasource model_name: model used to do evaluation select: sql statement to get evaluate data set result_table: the table name to save result model_params: dict, Params for training, crossponding to WITH claus """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") project = table_ops.get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = pai_model.get_oss_model_save_path(datasource, model_name, user=user) params["oss_model_path"] = oss_model_path model_type, estimator = pai_model.get_oss_saved_model_type_and_estimator( oss_model_path, project) if model_type == EstimatorType.PAIML: raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.") data_table = table_ops.create_tmp_table_from_select(select, datasource) params["data_table"] = data_table metrics = get_evaluate_metrics(model_type, model_attrs) params["metrics"] = metrics create_evaluate_result_table(datasource, result_table, metrics) conf = cluster_conf.get_cluster_config(model_attrs) if model_type == EstimatorType.XGBOOST: params["entry_type"] = "evaluate_xgb" else: params["entry_type"] = "evaluate_tf" prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_tf_cmd(conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE), "file://" + os.path.join(cwd, PARAMS_FILE), ENTRY_FILE, model_name, oss_model_path, data_table, "", result_table, project) submit_pai_task(cmd, datasource) table_ops.drop_tables([data_table], datasource)
def test_get_cluster_config(self): attrs = { "train.worker_cpu": 100, "train.worker_gpu": 0, "train.ps_cpu": 100, } conf = get_cluster_config(attrs) self.assertEqual(100, conf["worker"]["cpu"]) self.assertEqual(0, conf["worker"]["gpu"]) self.assertEqual(100, conf["ps"]["cpu"]) attrs["train.worker_cpu"] = 100.0 with self.assertRaises(SQLFlowDiagnostic) as ctx: get_cluster_config(attrs) self.assertEqual("value for cluster config should be int", ctx.exception.args[0]) attrs["train.worker_cpu"] = 100 attrs["train.num_evaluator"] = 2 with self.assertRaises(SQLFlowDiagnostic) as ctx: get_cluster_config(attrs) self.assertEqual("train.num_evaluator should only be 1 or 0", ctx.exception.args[0])
def submit_pai_predict(datasource, select, result_table, label_column, model_name, model_attrs): """This function pack needed params and resource to a tarball and submit a prediction task to PAI Args: datasource: current datasource select: sql statement to get prediction data set result_table: the table name to save result label_column: name of the label column, if not exist in select model_name: model used to do prediction model_params: dict, Params for training, crossponding to WITH clause """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") # TODO(typhoonzero): Do **NOT** create tmp table when the select statement # is like: "SELECT fields,... FROM table" data_table = create_tmp_table_from_select(select, datasource) params["data_table"] = data_table # format resultTable name to "db.table" to let the codegen form a # submitting argument of format "odps://project/tables/table_name" project = get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = get_oss_model_save_path(datasource, model_name) params["oss_model_path"] = oss_model_path model_type, estimator = get_oss_saved_model_type_and_estimator( oss_model_path, project) setup_predict_entry(params, model_type) # (TODO:lhw) get train label column from model meta create_predict_result_table(datasource, data_table, result_table, label_column, None, model_type) conf = cluster_conf.get_cluster_config(model_attrs) prepare_archive(cwd, conf, project, estimator, model_name, data_table, "", oss_model_path, params) cmd = get_pai_predict_cmd(conf, datasource, project, oss_model_path, model_name, data_table, result_table, model_type, cwd) submit_pai_task(cmd, datasource) drop_tables([data_table], datasource)
def get_pai_predict_cmd(datasource, project, oss_model_path, model_name, predict_table, result_table, model_type, model_params, job_file, params_file, cwd): """Get predict command for PAI task Args: datasource: current datasource project: current project oss_model_path: the place to load model model_name: model used to do prediction predict_table: where to store the tmp prediction data set result_table: prediction result model_type: type of th model, see also get_oss_saved_model_type model_params: parameters specified by WITH clause job_file: tar file incldue code and libs to execute on PAI params_file: extra params file cwd: current working dir Returns: The command to submit PAI prediction task """ # NOTE(typhoonzero): for PAI machine learning toolkit predicting, we can # not load the TrainStmt since the model saving is fully done by PAI. # We directly use the columns in SELECT statement for prediction, error # will be reported by PAI job if the columns not match. conf = cluster_conf.get_cluster_config(model_params) conn = db.connect_with_data_source(datasource) if model_type == EstimatorType.PAIML: schema = db.get_table_schema(conn, predict_table) result_fields = [col[0] for col in schema] return ('''pai -name prediction -DmodelName="%s" ''' '''-DinputTableName="%s" -DoutputTableName="%s" ''' '''-DfeatureColNames="%s" -DappendColNames="%s"''') % ( model_name, predict_table, result_table, ",".join(result_fields), ",".join(result_fields)) else: schema = db.get_table_schema(conn, result_table) result_fields = [col[0] for col in schema] # For TensorFlow and XGBoost, we build a pai-tf cmd to submit the task return get_pai_tf_cmd(conf, job_file, params_file, ENTRY_FILE, model_name, oss_model_path, predict_table, "", result_table, project)
def get_pai_train_cmd(datasource, estimator_string, model_name, train_table, val_table, model_params, train_params, path_to_save, job_file, params_file, cwd): """Get train model comman for PAI Args: datasource: current datasource estimator_string: estimator name, Keras class name, or XGBoost model_name: the model name to train train_table: data table from which to load train data val_table: data table from which to load evaluate data model_params: params for training, crossponding to WITH clause train_params: parmas for the trainning process path_to_save: path to save the model job_file: tar file incldue code and libs to execute on PAI params_file: extra params file cwd: current working dir Returns: The command to submit a PAI train task """ project = table_ops.get_project(datasource) conf = cluster_conf.get_cluster_config(model_params) if estimator_string.lower() == "randomforests": cmd = get_train_random_forest_pai_cmd( model_name, train_table, model_params, train_params["feature_column_names"], train_params["label_meta"]["feature_name"]) elif estimator_string.lower() == "kmeans": cmd = get_train_kmeans_pai_cmd(datasource, model_name, train_table, model_params, train_params["feature_column_names"]) else: cmd = get_pai_tf_cmd(conf, job_file, params_file, ENTRY_FILE, model_name, path_to_save, train_table, val_table, "", project) return cmd
def submit_pai_train(datasource, estimator_string, select, validation_select, model_params, model_name, pre_trained_model, **train_params): """This function submit PAI-TF train task to PAI platform Args: datasource: string Like: odps://access_id:[email protected]/api? curr_project=test_ci&scheme=http estimator_string: string TensorFlow estimator name, Keras class name, or XGBoost select: string The SQL statement for selecting data for train validation_select: string Ths SQL statement for selecting data for validation model_params: dict Params for training, crossponding to WITH clause pre_trained_model: string The pre-trained model name to load train_params: dict Extra train params, they will be passed to runtime.tensorflow.train """ # prepare params for tensorflow train, # the params will be pickled into train_params.pkl params = dict(locals()) del params["train_params"] params.update(train_params) if estimator_string.lower().startswith("xgboost"): params["entry_type"] = "train_xgb" else: params["entry_type"] = "train_tf" cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") conf = cluster_conf.get_cluster_config(model_params) train_table, val_table = create_train_and_eval_tmp_table( select, validation_select, datasource) params["pai_table"], params["pai_val_table"] = train_table, val_table # clean target dir path_to_save = get_oss_model_save_path(datasource, model_name) path_to_load = get_oss_model_save_path(datasource, pre_trained_model) project = get_project(datasource) params["oss_model_dir"] = path_to_save if path_to_load == "" or path_to_load != path_to_save: clean_oss_model_path(path_to_save + "/") # zip all required resource to a tarball prepare_archive(cwd, conf, project, estimator_string, model_name, train_table, val_table, path_to_save, params) # submit pai task to execute the training if estimator_string.lower() == "randomforests": cmd = get_train_random_forest_pai_cmd( model_name, train_table, model_params, train_params["feature_column_names"], train_params["label_meta"]["feature_name"]) elif estimator_string.lower() == "kmeans": cmd = get_train_kmeans_pai_cmd(datasource, model_name, train_table, model_params, train_params["feature_column_names"]) else: cmd = get_pai_tf_cmd(conf, "file://" + path.join(cwd, JOB_ARCHIVE_FILE), "file://" + path.join(cwd, PARAMS_FILE), ENTRY_FILE, model_name, path_to_save, train_table, val_table, "", project) submit_pai_task(cmd, datasource) # save trained model to sqlfs save_model_to_sqlfs(datasource, path_to_save, model_name) drop_tables([train_table, val_table], datasource)
def submit_pai_evaluate(datasource, original_sql, select, label_name, model, model_params, result_table, user=""): """Submit a PAI evaluation task Args: datasource: string Like: maxcompute://ak:[email protected]/api? curr_project=test_ci&scheme=http original_sql: string Original "TO PREDICT" statement. select: string SQL statement to get prediction data set. model: string Model to load and do prediction. label_name: string The label name to evaluate. model_params: dict Params for training, crossponding to WITH clause. result_table: string The table name to save prediction result. user: string A string to identify the user, used to load model from the user's directory. """ params = dict(locals()) project = table_ops.get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) params["result_table"] = result_table oss_model_path = pai_model.get_oss_model_save_path(datasource, model, user=user) model_type, estimator = pai_model.get_saved_model_type_and_estimator( datasource, model) if model_type == EstimatorType.PAIML: raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.") if model_type == EstimatorType.XGBOOST: params["entry_type"] = "evaluate_xgb" validation_metrics = model_params.get("validation.metrics", "accuracy_score") else: params["entry_type"] = "evaluate_tf" validation_metrics = model_params.get("validation.metrics", "Accuracy") validation_metrics = [m.strip() for m in validation_metrics.split(",")] with db.connect_with_data_source(datasource) as conn: result_column_names = create_evaluate_table(conn, result_table, validation_metrics) with table_ops.create_tmp_tables_guard(select, datasource) as data_table: params["pai_table"] = data_table params["result_column_names"] = result_column_names if try_pai_local_run(params, oss_model_path): return conf = cluster_conf.get_cluster_config(model_params) with temp_file.TemporaryDirectory(prefix="sqlflow", dir="/tmp") as cwd: prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_tf_cmd( conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE), "file://" + os.path.join(cwd, PARAMS_FILE), ENTRY_FILE, model, oss_model_path, data_table, "", result_table, project) submit_pai_task(cmd, datasource)
def submit_pai_evaluate(datasource, original_sql, select, model_name, model_params, result_table, user=""): """Submit a PAI evaluation task Args: datasource: string Like: maxcompute://ak:[email protected]/api? curr_project=test_ci&scheme=http original_sql: string Original "TO PREDICT" statement. select: string SQL statement to get prediction data set. model_name: string Model to load and do prediction. model_params: dict Params for training, crossponding to WITH clause. result_table: string The table name to save prediction result. user: string A string to identify the user, used to load model from the user's directory. """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") project = table_ops.get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) params["result_table"] = result_table oss_model_path = pai_model.get_oss_model_save_path(datasource, model_name, user=user) params["oss_model_path"] = oss_model_path model_type, estimator = pai_model.get_oss_saved_model_type_and_estimator( oss_model_path, project) if model_type == EstimatorType.PAIML: raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.") data_table = table_ops.create_tmp_table_from_select(select, datasource) params["data_table"] = data_table metrics = get_evaluate_metrics(model_type, model_params) params["metrics"] = metrics create_evaluate_result_table(datasource, result_table, metrics) conf = cluster_conf.get_cluster_config(model_params) if model_type == EstimatorType.XGBOOST: params["entry_type"] = "evaluate_xgb" else: params["entry_type"] = "evaluate_tf" prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_tf_cmd(conf, "file://" + os.path.join(cwd, JOB_ARCHIVE_FILE), "file://" + os.path.join(cwd, PARAMS_FILE), ENTRY_FILE, model_name, oss_model_path, data_table, "", result_table, project) submit_pai_task(cmd, datasource) table_ops.drop_tables([data_table], datasource)