コード例 #1
0
def predict(datasource, select, data_table, result_table, label_column,
            oss_model_path):
    """PAI TensorFlow prediction wrapper
    This function do some preparation for the local prediction, say,
    download the model from OSS, extract metadata and so on.

    Args:
        datasource: the datasource from which to get data
        select: data selection SQL statement
        data_table: tmp table which holds the data from select
        result_table: table to save prediction result
        label_column: prediction label column
        oss_model_path: the model path on OSS
    """

    try:
        tf.enable_eager_execution()
    except:  # noqa: E722
        pass

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    feature_columns = eval(feature_columns_code)

    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/model_save" % oss_model_path)
    else:
        oss.load_file(oss_model_path, "model_save")

    _predict(datasource=datasource,
             estimator_string=estimator,
             select=select,
             result_table=result_table,
             feature_columns=feature_columns,
             feature_column_names=feature_column_names,
             feature_column_names_map=feature_column_names_map,
             train_label_name=label_meta["feature_name"],
             result_col_name=label_column,
             feature_metas=feature_metas,
             model_params=model_params,
             save="model_save",
             batch_size=1,
             pai_table=data_table)
コード例 #2
0
ファイル: explain.py プロジェクト: hsjung6/sqlflow
def explain_step(datasource, select, data_table, result_table, label_column,
                 oss_model_path):
    try:
        tf.enable_eager_execution()
    except Exception as e:
        sys.stderr.write("warning: failed to enable_eager_execution: %s" % e)
        pass

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    fc_map_ir = feature_columns_code
    feature_columns = compile_ir_feature_columns(fc_map_ir,
                                                 EstimatorType.TENSORFLOW)
    field_descs = get_ordered_field_descs(fc_map_ir)
    feature_column_names = [fd.name for fd in field_descs]
    feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs])

    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    model_name = oss_model_path.split("/")[-1]
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/%s" % (oss_model_path, model_name))
    else:
        oss.load_dir(os.path.join(oss_model_path, "model_save"))

    # (TODO: lhw) use oss to store result image
    _explain(datasource=datasource,
             estimator_string=estimator,
             select=select,
             feature_columns=feature_columns,
             feature_column_names=feature_column_names,
             feature_metas=feature_metas,
             label_meta=label_meta,
             model_params=model_params,
             save="model_save",
             result_table=result_table,
             pai_table=data_table,
             oss_dest=None,
             oss_ak=None,
             oss_sk=None,
             oss_endpoint=None,
             oss_bucket_name=None)
コード例 #3
0
def load_oss_model(oss_model_dir, estimator):
    is_estimator = is_tf_estimator(estimator)
    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    if is_estimator:
        load_file(oss_model_dir, "exported_path")

    # NOTE(typhoonzero): directory "model_save" is hardcoded in
    # codegen/tensorflow/codegen.go
    load_dir(os.path.join(oss_model_dir, "model_save"))
コード例 #4
0
ファイル: evaluate.py プロジェクト: annisun2020/sqlflow
def evaluate(datasource, select, data_table, result_table, oss_model_path,
             metrics):
    """PAI Tensorflow evaluate wrapper
    This function do some preparation for the local evaluation, say,
    download the model from OSS, extract metadata and so on.

    Args:
        datasource: the datasource from which to get data
        select: data selection SQL statement
        data_table: tmp table which holds the data from select
        result_table: table to save prediction result
        oss_model_path: the model path on OSS
        metrics: metrics to evaluate
    """

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    feature_columns = eval(feature_columns_code)
    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/model_save" % oss_model_path)
    else:
        oss.load_file(oss_model_path, "model_save")

    _evaluate(datasource=datasource,
              estimator_string=estimator,
              select=select,
              result_table=result_table,
              feature_columns=feature_columns,
              feature_column_names=feature_column_names,
              feature_metas=feature_metas,
              label_meta=label_meta,
              model_params=model_params,
              validation_metrics=metrics,
              save="model_save",
              batch_size=1,
              validation_steps=None,
              verbose=0,
              is_pai=True,
              pai_table=data_table)
コード例 #5
0
ファイル: tensorflow.py プロジェクト: gkn1fexxx/sqlflow
def load_oss_model(oss_model_dir, estimator):
    set_oss_environs(FLAGS)

    is_estimator = is_tf_estimator(estimator)

    # Keras single node is using h5 format to save the model, no need to deal with export model format.
    # Keras distributed mode will use estimator, so this is also needed.
    if is_estimator:
        model.load_file(oss_model_dir, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in codegen/tensorflow/codegen.go
        model.load_dir(oss_model_dir + "/model_save")
    else:
        model.load_file(oss_model_dir, "model_save")
コード例 #6
0
ファイル: evaluate.py プロジェクト: annisun2020/sqlflow
def _evaluate(datasource,
              estimator_string,
              select,
              result_table,
              feature_columns,
              feature_column_names,
              feature_metas={},
              label_meta={},
              model_params={},
              validation_metrics=["Accuracy"],
              save="",
              batch_size=1,
              validation_steps=None,
              verbose=0,
              pai_table=""):
    estimator_cls = import_model(estimator_string)
    is_estimator = is_tf_estimator(estimator_cls)
    set_log_level(verbose, is_estimator)
    eval_dataset = get_dataset_fn(select,
                                  datasource,
                                  feature_column_names,
                                  feature_metas,
                                  label_meta,
                                  is_pai=True,
                                  pai_table=pai_table,
                                  batch_size=batch_size)

    model_params.update(feature_columns)
    if is_estimator:
        FLAGS = tf.app.flags.FLAGS
        model_params["model_dir"] = FLAGS.checkpointDir
        estimator = estimator_cls(**model_params)
        result_metrics = estimator_evaluate(estimator, eval_dataset,
                                            validation_metrics)
    else:
        keras_model = init_model_with_feature_column(estimator, model_params)
        keras_model_pkg = sys.modules[estimator_cls.__module__]
        result_metrics = keras_evaluate(keras_model, eval_dataset, save,
                                        keras_model_pkg, validation_metrics)

    if result_table:
        metric_name_list = ["loss"] + validation_metrics
        write_result_metrics(result_metrics,
                             metric_name_list,
                             result_table,
                             "pai_maxcompute",
                             None,
                             hdfs_namenode_addr="",
                             hive_location="",
                             hdfs_user="",
                             hdfs_pass="")
コード例 #7
0
ファイル: explain.py プロジェクト: hsjung6/sqlflow
def explain(datasource,
            estimator_string,
            select,
            feature_columns,
            feature_column_names,
            feature_metas={},
            label_meta={},
            model_params={},
            save="",
            pai_table="",
            plot_type='bar',
            result_table="",
            oss_dest=None,
            oss_ak=None,
            oss_sk=None,
            oss_endpoint=None,
            oss_bucket_name=None):
    estimator_cls = import_model(estimator_string)
    if is_tf_estimator(estimator_cls):
        model_params['model_dir'] = save
    model_params.update(feature_columns)
    pop_optimizer_and_loss(model_params)

    def _input_fn():
        dataset = input_fn(select, datasource, feature_column_names,
                           feature_metas, label_meta)
        return dataset.batch(1).cache()

    estimator = init_model_with_feature_column(estimator_cls, model_params)
    conn = connect_with_data_source(datasource)

    if estimator_cls in (tf.estimator.BoostedTreesClassifier,
                         tf.estimator.BoostedTreesRegressor):
        explain_boosted_trees(datasource, estimator, _input_fn, plot_type,
                              result_table, feature_column_names, conn,
                              oss_dest, oss_ak, oss_sk, oss_endpoint,
                              oss_bucket_name)
    else:
        shap_dataset = pd.DataFrame(columns=feature_column_names)
        for i, (features, label) in enumerate(_input_fn()):
            shap_dataset.loc[i] = [
                item.numpy()[0][0] for item in features.values()
            ]
        explain_dnns(datasource, estimator, shap_dataset, plot_type,
                     result_table, feature_column_names, conn, oss_dest,
                     oss_ak, oss_sk, oss_endpoint, oss_bucket_name)

    conn.close()
コード例 #8
0
def _predict(datasource,
             estimator_string,
             select,
             result_table,
             feature_columns,
             feature_column_names,
             feature_column_names_map,
             train_label_name,
             result_col_name,
             feature_metas={},
             model_params={},
             save="",
             batch_size=1,
             pai_table=""):
    estimator = import_model(estimator_string)
    model_params.update(feature_columns)
    is_estimator = is_tf_estimator(estimator)

    conn = PaiIOConnection.from_table(pai_table)
    selected_cols = db.selected_cols(conn, None)
    predict_generator = db.db_generator(conn, None)

    pop_optimizer_and_loss(model_params)

    if not is_estimator:
        if not issubclass(estimator, tf.keras.Model):
            # functional model need field_metas parameter
            model_params["field_metas"] = feature_metas
        print("Start predicting using keras model...")
        keras_predict(estimator, model_params, save, result_table,
                      feature_column_names, feature_metas, train_label_name,
                      result_col_name, conn, predict_generator, selected_cols)
    else:
        model_params['model_dir'] = save
        print("Start predicting using estimator model...")
        estimator_predict(result_table, feature_column_names, feature_metas,
                          train_label_name, result_col_name, conn,
                          predict_generator, selected_cols)

    print("Done predicting. Predict table : %s" % result_table)
コード例 #9
0
ファイル: tensorflow.py プロジェクト: gkn1fexxx/sqlflow
def save_oss_model(oss_model_dir, estimator, num_workers, feature_column_names,
                   feature_column_names_map, feature_metas, label_meta,
                   model_params, feature_columns_code):
    is_estimator = is_tf_estimator(estimator)

    # Keras single node is using h5 format to save the model, no need to deal with export model format.
    # Keras distributed mode will use estimator, so this is also needed.
    if is_estimator:
        if FLAGS.task_index == 0:
            with open("exported_path", "r") as fn:
                saved_model_path = fn.read()
            model.save_dir(oss_model_dir, saved_model_path)
            model.save_file(oss_model_dir, "exported_path")
    else:
        if len(FLAGS.worker_hosts.split(",")) > 1:
            if FLAGS.task_index == 0:
                model.save_file(oss_model_dir, "exported_path")
        else:
            model.save_file(oss_model_dir, "model_save")

    model.save_metas(oss_model_dir, num_workers, "tensorflow_model_desc",
                     estimator, feature_column_names, feature_column_names_map,
                     feature_metas, label_meta, model_params,
                     feature_columns_code)
コード例 #10
0
def explain(datasource,
            estimator_string,
            select,
            feature_columns,
            feature_column_names,
            feature_metas={},
            label_meta={},
            model_params={},
            save="",
            pai_table="",
            plot_type='bar',
            result_table="",
            oss_dest=None,
            oss_ak=None,
            oss_sk=None,
            oss_endpoint=None,
            oss_bucket_name=None):
    estimator_cls = import_model(estimator_string)
    is_pai = True if pai_table else False
    if is_pai:
        FLAGS = tf.app.flags.FLAGS
        model_params["model_dir"] = FLAGS.checkpointDir
        select = ""
    else:
        if is_tf_estimator(estimator_cls):
            model_params['model_dir'] = save
    model_params.update(feature_columns)
    pop_optimizer_and_loss(model_params)

    def _input_fn():
        dataset = input_fn(select,
                           datasource,
                           feature_column_names,
                           feature_metas,
                           label_meta,
                           is_pai=is_pai,
                           pai_table=pai_table)
        return dataset.batch(1).cache()

    estimator = init_model_with_feature_column(estimator_cls, model_params)
    if not is_tf_estimator(estimator_cls):
        load_keras_model_weights(estimator, save)

    if is_pai:
        conn = PaiIOConnection.from_table(
            result_table) if result_table else None
    else:
        conn = connect_with_data_source(datasource)

    if estimator_cls in (tf.estimator.BoostedTreesClassifier,
                         tf.estimator.BoostedTreesRegressor):
        explain_boosted_trees(datasource, estimator, _input_fn, plot_type,
                              result_table, feature_column_names, conn,
                              oss_dest, oss_ak, oss_sk, oss_endpoint,
                              oss_bucket_name)
    else:
        shap_dataset = pd.DataFrame(columns=feature_column_names)
        for i, (features, label) in enumerate(_input_fn()):
            shap_dataset.loc[i] = [
                item.numpy()[0][0] for item in features.values()
            ]
        explain_dnns(datasource, estimator, shap_dataset, plot_type,
                     result_table, feature_column_names, conn, oss_dest,
                     oss_ak, oss_sk, oss_endpoint, oss_bucket_name)

    if conn is not None:
        conn.close()
コード例 #11
0
ファイル: predict.py プロジェクト: vmnet04/sqlflow
def _predict(datasource,
             estimator_string,
             select,
             result_table,
             feature_columns,
             feature_column_names,
             feature_column_names_map,
             train_label_name,
             result_col_name,
             feature_metas={},
             model_params={},
             save="",
             batch_size=1,
             pai_table=""):
    estimator = import_model(estimator_string)
    model_params.update(feature_columns)
    is_estimator = is_tf_estimator(estimator)

    conn = None
    driver = "pai_maxcompute"
    pai_table_parts = pai_table.split(".")
    formatted_pai_table = "odps://%s/tables/%s" % (pai_table_parts[0],
                                                   pai_table_parts[1])
    selected_cols = db.pai_selected_cols(formatted_pai_table)
    predict_generator = db.pai_maxcompute_db_generator(formatted_pai_table)

    if not is_estimator:
        if not issubclass(estimator, tf.keras.Model):
            # functional model need field_metas parameter
            model_params["field_metas"] = feature_metas
        print("Start predicting using keras model...")
        keras_predict(estimator,
                      model_params,
                      save,
                      result_table,
                      feature_column_names,
                      feature_metas,
                      train_label_name,
                      result_col_name,
                      driver,
                      conn,
                      predict_generator,
                      selected_cols,
                      hdfs_namenode_addr="",
                      hive_location="",
                      hdfs_user="",
                      hdfs_pass="")
    else:
        model_params['model_dir'] = save
        print("Start predicting using estimator model...")
        estimator_predict(estimator,
                          model_params,
                          save,
                          result_table,
                          feature_column_names,
                          feature_column_names_map,
                          feature_columns,
                          feature_metas,
                          train_label_name,
                          result_col_name,
                          driver,
                          conn,
                          predict_generator,
                          selected_cols,
                          hdfs_namenode_addr="",
                          hive_location="",
                          hdfs_user="",
                          hdfs_pass="")

    print("Done predicting. Predict table : %s" % result_table)