Esempio n. 1
0
def predict_step(datasource, select, data_table, result_table, label_column,
                 oss_model_path):
    """PAI TensorFlow prediction wrapper
    This function do some preparation for the local prediction, say,
    download the model from OSS, extract metadata and so on.

    Args:
        datasource: the datasource from which to get data
        select: data selection SQL statement
        data_table: tmp table which holds the data from select
        result_table: table to save prediction result
        label_column: prediction label column
        oss_model_path: the model path on OSS
    """

    try:
        tf.enable_eager_execution()
    except:  # noqa: E722
        pass

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    fc_map_ir = feature_columns_code
    feature_columns = compile_ir_feature_columns(fc_map_ir,
                                                 EstimatorType.TENSORFLOW)
    field_descs = get_ordered_field_descs(fc_map_ir)
    feature_column_names = [fd.name for fd in field_descs]
    feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs])

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    model_local_dir = oss_model_path.split("/")[-1]
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/%s" % (oss_model_path, model_local_dir))
    else:
        oss.load_dir(os.path.join(oss_model_path, "model_save"))

    _predict(datasource=datasource,
             estimator_string=estimator,
             select=select,
             result_table=result_table,
             feature_columns=feature_columns,
             feature_column_names=feature_column_names,
             feature_column_names_map=feature_column_names_map,
             train_label_name=label_meta["feature_name"],
             result_col_name=label_column,
             feature_metas=feature_metas,
             model_params=model_params,
             save=model_local_dir,
             batch_size=1,
             pai_table=data_table)
Esempio n. 2
0
def evaluate_step(datasource, select, data_table, result_table, oss_model_path,
                  metrics):
    """PAI TensorFlow evaluate wrapper
    This function do some preparation for the local evaluation, say,
    download the model from OSS, extract metadata and so on.

    Args:
        datasource: the datasource from which to get data
        select: data selection SQL statement
        data_table: tmp table which holds the data from select
        result_table: table to save prediction result
        oss_model_path: the model path on OSS
        metrics: metrics to evaluate
    """

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    fc_map_ir = feature_columns_code
    feature_columns = compile_ir_feature_columns(fc_map_ir,
                                                 EstimatorType.TENSORFLOW)
    field_descs = get_ordered_field_descs(fc_map_ir)
    feature_column_names = [fd.name for fd in field_descs]
    feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs])

    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    model_name = oss_model_path.split("/")[-1]
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/%s" % (oss_model_path, model_name))
    else:
        oss.load_dir(os.path.join(oss_model_path, "model_save"))

    _evaluate(datasource=datasource,
              estimator_string=estimator,
              select=select,
              result_table=result_table,
              feature_columns=feature_columns,
              feature_column_names=feature_column_names,
              feature_metas=feature_metas,
              label_meta=label_meta,
              model_params=model_params,
              validation_metrics=metrics,
              save="model_save",
              batch_size=1,
              validation_steps=None,
              verbose=0,
              pai_table=data_table)
Esempio n. 3
0
def predict(datasource, select, data_table, result_table, label_column,
            oss_model_path):
    """PAI TensorFlow prediction wrapper
    This function do some preparation for the local prediction, say,
    download the model from OSS, extract metadata and so on.

    Args:
        datasource: the datasource from which to get data
        select: data selection SQL statement
        data_table: tmp table which holds the data from select
        result_table: table to save prediction result
        label_column: prediction label column
        oss_model_path: the model path on OSS
    """

    try:
        tf.enable_eager_execution()
    except:  # noqa: E722
        pass

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    feature_columns = eval(feature_columns_code)

    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/model_save" % oss_model_path)
    else:
        oss.load_file(oss_model_path, "model_save")

    _predict(datasource=datasource,
             estimator_string=estimator,
             select=select,
             result_table=result_table,
             feature_columns=feature_columns,
             feature_column_names=feature_column_names,
             feature_column_names_map=feature_column_names_map,
             train_label_name=label_meta["feature_name"],
             result_col_name=label_column,
             feature_metas=feature_metas,
             model_params=model_params,
             save="model_save",
             batch_size=1,
             pai_table=data_table)
Esempio n. 4
0
    def save_to_db(self,
                   datasource,
                   table,
                   local_dir=None,
                   oss_model_dir=None):
        """
        This save function would archive all the files on local_dir
        into a tarball, and save it into DBMS with the specified table
        name.

        Args:
            datasource (str): the connection string to DBMS.
            table (str): the saved table name.
            local_dir (str): the local directory to save.

        Returns:
            None.
        """
        if local_dir is None:
            local_dir = os.getcwd()

        conn = connect_with_data_source(datasource)

        if oss_model_dir:
            cur_dir = os.getcwd()
            os.chdir(local_dir)
            oss.load_dir(oss_model_dir)
            os.chdir(cur_dir)

        if "." not in table:
            project_name = conn.param("database")
            table = project_name + "." + table

        with temp_file.TemporaryDirectory() as tmp_dir:
            tarball = os.path.join(tmp_dir, TARBALL_NAME)
            self._zip(local_dir, tarball)

            def _bytes_reader(filename, buf_size=8 * 32):
                def _gen():
                    with open(filename, "rb") as f:
                        while True:
                            data = f.read(buf_size)
                            if data:
                                yield data
                            else:
                                break

                return _gen

            write_with_generator_and_metadata(datasource, table,
                                              _bytes_reader(tarball),
                                              self._to_dict())

        conn.persist_table(table)
        conn.close()
        return table
Esempio n. 5
0
def explain_step(datasource, select, data_table, result_table, label_column,
                 oss_model_path):
    try:
        tf.enable_eager_execution()
    except Exception as e:
        sys.stderr.write("warning: failed to enable_eager_execution: %s" % e)
        pass

    (estimator, feature_column_names, feature_column_names_map, feature_metas,
     label_meta, model_params,
     feature_columns_code) = oss.load_metas(oss_model_path,
                                            "tensorflow_model_desc")

    fc_map_ir = feature_columns_code
    feature_columns = compile_ir_feature_columns(fc_map_ir,
                                                 EstimatorType.TENSORFLOW)
    field_descs = get_ordered_field_descs(fc_map_ir)
    feature_column_names = [fd.name for fd in field_descs]
    feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs])

    # NOTE(typhoonzero): No need to eval model_params["optimizer"] and
    # model_params["loss"] because predicting do not need these parameters.

    is_estimator = is_tf_estimator(import_model(estimator))

    # Keras single node is using h5 format to save the model, no need to deal
    # with export model format. Keras distributed mode will use estimator, so
    # this is also needed.
    model_name = oss_model_path.split("/")[-1]
    if is_estimator:
        oss.load_file(oss_model_path, "exported_path")
        # NOTE(typhoonzero): directory "model_save" is hardcoded in
        # codegen/tensorflow/codegen.go
        oss.load_dir("%s/%s" % (oss_model_path, model_name))
    else:
        oss.load_dir(os.path.join(oss_model_path, "model_save"))

    # (TODO: lhw) use oss to store result image
    _explain(datasource=datasource,
             estimator_string=estimator,
             select=select,
             feature_columns=feature_columns,
             feature_column_names=feature_column_names,
             feature_metas=feature_metas,
             label_meta=label_meta,
             model_params=model_params,
             save="model_save",
             result_table=result_table,
             pai_table=data_table,
             oss_dest=None,
             oss_ak=None,
             oss_sk=None,
             oss_endpoint=None,
             oss_bucket_name=None)