Exemple #1
0
 def test_get_datasource_dsn(self):
     ds = "odps://*****:*****@service.com/api?curr_project=test_ci&scheme=http"
     expected_dsn = "access_id:[email protected]/api?curr_project=test_ci&scheme=http"
     dsn = submitter.get_datasource_dsn(ds)
     self.assertEqual(expected_dsn, dsn)
     project = "test_ci"
     self.assertEqual(project, submitter.get_project(ds))
Exemple #2
0
def submit_alisa_explain(datasource, select, result_table, model_name,
                         model_params):
    """This function pack need params and resource to a tarball
    and submit a explain task to PAI through Alisa

    Args:
        datasource: current datasource
        select: sql statement to get explain data set
        result_table: the table name to save result
        model_name: model used to do prediction
        model_params: dict, Params for training, crossponding to WITH clause
    """
    params = dict(locals())

    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")
    # TODO(typhoonzero): Do **NOT** create tmp table when the select
    # statement is like: "SELECT fields,... FROM table"
    data_table = create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    # format resultTable name to "db.table" to let the codegen
    # form a submitting argument of format
    # "odps://project/tables/table_name"
    project = get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)

    oss_model_path = get_oss_model_save_path(datasource, model_name)
    model_type, estimator = get_saved_model_type_and_estimator(
        datasource, model_name)
    params["oss_model_path"] = oss_model_path

    label_column = model_params.get("label_col")
    params["label_column"] = label_column
    # FIXME(typhoonzero): Add this back using runtime.step.create_result_table
    # create_explain_result_table(datasource, data_table, result_table,
    #                             model_type, estimator, label_column)

    setup_explain_entry(params, model_type)
    prepare_archive(cwd, estimator, oss_model_path, params)

    cmd = get_pai_explain_cmd(datasource, project, oss_model_path, model_name,
                              data_table, result_table, model_type,
                              model_params, "file://@@%s" % JOB_ARCHIVE_FILE,
                              "file://@@%s" % PARAMS_FILE, label_column, cwd)
    upload_resource_and_submit_alisa_task(
        datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE),
        "file://" + path.join(cwd, PARAMS_FILE), cmd)
    drop_tables([data_table], datasource)
Exemple #3
0
def submit_alisa_predict(datasource, select, result_table, label_column,
                         model_name, model_params):
    """This function pack needed params and resource to a tarball
    and submit a prediction task to PAI throught Alisa

    Args:
        datasource: current datasource
        select: sql statement to get prediction data set
        result_table: the table name to save result
        label_column: name of the label column, if not exist in select
        model_name: model used to do prediction
        model_params: dict, Params for training, crossponding to WITH clause
    """
    params = dict(locals())

    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")
    data_table = create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    # format resultTable name to "db.table" to let the
    # codegen form a submitting argument of format
    # "odps://project/tables/table_name"
    project = get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)

    oss_model_path = get_oss_model_save_path(datasource, model_name)
    params["oss_model_path"] = oss_model_path
    model_type, estimator = get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    setup_predict_entry(params, model_type)

    # (TODO:lhw) get train label column from model meta
    create_predict_result_table(datasource, data_table, result_table,
                                label_column, None, model_type)

    prepare_archive(cwd, estimator, oss_model_path, params)

    cmd = get_pai_predict_cmd(datasource, project, oss_model_path, model_name,
                              data_table, result_table, model_type,
                              model_params, "file://@@%s" % JOB_ARCHIVE_FILE,
                              "file://@@%s" % PARAMS_FILE, cwd)

    upload_resource_and_submit_alisa_task(
        datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE),
        "file://" + path.join(cwd, PARAMS_FILE), cmd)

    drop_tables([data_table], datasource)
Exemple #4
0
def submit_alisa_evaluate(datasource, model_name, select, result_table,
                          model_attrs):
    """Submit a PAI evaluation task through Alisa

    Args:
        datasource: current datasource
        model_name: model used to do evaluation
        select: sql statement to get evaluate data set
        result_table: the table name to save result
        model_params: dict, Params for training, crossponding to WITH claus
    """

    params = dict(locals())
    cwd = tempfile.mkdtemp(prefix="sqlflow", dir="/tmp")

    project = get_project(datasource)
    if result_table.count(".") == 0:
        result_table = "%s.%s" % (project, result_table)
    oss_model_path = get_oss_model_save_path(datasource, model_name)
    params["oss_model_path"] = oss_model_path

    model_type, estimator = get_oss_saved_model_type_and_estimator(
        oss_model_path, project)
    if model_type == EstimatorType.PAIML:
        raise SQLFlowDiagnostic("PAI model evaluation is not supported yet.")

    data_table = create_tmp_table_from_select(select, datasource)
    params["data_table"] = data_table

    metrics = get_evaluate_metrics(model_type, model_attrs)
    params["metrics"] = metrics
    create_evaluate_result_table(datasource, result_table, metrics)

    conf = cluster_conf.get_cluster_config(model_attrs)

    if model_type == EstimatorType.XGBOOST:
        params["entry_type"] = "evaluate_xgb"
    else:
        params["entry_type"] = "evaluate_tf"
    prepare_archive(cwd, estimator, oss_model_path, params)
    cmd = get_pai_tf_cmd(conf, "file://@@%s" % JOB_ARCHIVE_FILE,
                         "file://@@%s" % PARAMS_FILE, ENTRY_FILE, model_name,
                         oss_model_path, data_table, "", result_table, project)
    upload_resource_and_submit_alisa_task(
        datasource, "file://" + path.join(cwd, JOB_ARCHIVE_FILE),
        "file://" + path.join(cwd, PARAMS_FILE), cmd)
    drop_tables([data_table], datasource)