def test_get_datasource_dsn(self): ds = "odps://*****:*****@service.com/api?curr_project=test_ci&scheme=http" expected_dsn = "access_id:[email protected]/api?curr_project=test_ci&scheme=http" dsn = submitter.get_datasource_dsn(ds) self.assertEqual(expected_dsn, dsn) project = "test_ci" self.assertEqual(project, submitter.get_project(ds))
def submit_alisa_explain(datasource, select, result_table, model_name, model_params): """This function pack need params and resource to a tarball and submit a explain task to PAI through Alisa Args: datasource: current datasource select: sql statement to get explain data set result_table: the table name to save result model_name: model used to do prediction model_params: dict, Params for training, crossponding to WITH clause """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") # TODO(typhoonzero): Do **NOT** create tmp table when the select # statement is like: "SELECT fields,... FROM table" data_table = create_tmp_table_from_select(select, datasource) params["data_table"] = data_table # format resultTable name to "db.table" to let the codegen # form a submitting argument of format # "odps://project/tables/table_name" project = get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = get_oss_model_save_path(datasource, model_name) model_type, estimator = get_saved_model_type_and_estimator( datasource, model_name) params["oss_model_path"] = oss_model_path label_column = model_params.get("label_col") params["label_column"] = label_column # FIXME(typhoonzero): Add this back using runtime.step.create_result_table # create_explain_result_table(datasource, data_table, result_table, # model_type, estimator, label_column) setup_explain_entry(params, model_type) prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_explain_cmd(datasource, project, oss_model_path, model_name, data_table, result_table, model_type, model_params, "file://@@%s" % JOB_ARCHIVE_FILE, "file://@@%s" % PARAMS_FILE, label_column, cwd) upload_resource_and_submit_alisa_task( datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE), "file://" + path.join(cwd, PARAMS_FILE), cmd) drop_tables([data_table], datasource)
def submit_alisa_predict(datasource, select, result_table, label_column, model_name, model_params): """This function pack needed params and resource to a tarball and submit a prediction task to PAI throught Alisa Args: datasource: current datasource select: sql statement to get prediction data set result_table: the table name to save result label_column: name of the label column, if not exist in select model_name: model used to do prediction model_params: dict, Params for training, crossponding to WITH clause """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") data_table = create_tmp_table_from_select(select, datasource) params["data_table"] = data_table # format resultTable name to "db.table" to let the # codegen form a submitting argument of format # "odps://project/tables/table_name" project = get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = get_oss_model_save_path(datasource, model_name) params["oss_model_path"] = oss_model_path model_type, estimator = get_oss_saved_model_type_and_estimator( oss_model_path, project) setup_predict_entry(params, model_type) # (TODO:lhw) get train label column from model meta create_predict_result_table(datasource, data_table, result_table, label_column, None, model_type) prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_predict_cmd(datasource, project, oss_model_path, model_name, data_table, result_table, model_type, model_params, "file://@@%s" % JOB_ARCHIVE_FILE, "file://@@%s" % PARAMS_FILE, cwd) upload_resource_and_submit_alisa_task( datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE), "file://" + path.join(cwd, PARAMS_FILE), cmd) drop_tables([data_table], datasource)
def submit_alisa_evaluate(datasource, model_name, select, result_table, model_attrs): """Submit a PAI evaluation task through Alisa Args: datasource: current datasource model_name: model used to do evaluation select: sql statement to get evaluate data set result_table: the table name to save result model_params: dict, Params for training, crossponding to WITH claus """ params = dict(locals()) cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp") project = get_project(datasource) if result_table.count(".") == 0: result_table = "%s.%s" % (project, result_table) oss_model_path = get_oss_model_save_path(datasource, model_name) params["oss_model_path"] = oss_model_path model_type, estimator = get_oss_saved_model_type_and_estimator( oss_model_path, project) if model_type == EstimatorType.PAIML: raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.") data_table = create_tmp_table_from_select(select, datasource) params["data_table"] = data_table metrics = get_evaluate_metrics(model_type, model_attrs) params["metrics"] = metrics create_evaluate_result_table(datasource, result_table, metrics) conf = cluster_conf.get_cluster_config(model_attrs) if model_type == EstimatorType.XGBOOST: params["entry_type"] = "evaluate_xgb" else: params["entry_type"] = "evaluate_tf" prepare_archive(cwd, estimator, oss_model_path, params) cmd = get_pai_tf_cmd(conf, "file://@@%s" % JOB_ARCHIVE_FILE, "file://@@%s" % PARAMS_FILE, ENTRY_FILE, model_name, oss_model_path, data_table, "", result_table, project) upload_resource_and_submit_alisa_task( datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE), "file://" + path.join(cwd, PARAMS_FILE), cmd) drop_tables([data_table], datasource)