def predict_step(datasource, select, data_table, result_table, label_column, oss_model_path): """PAI TensorFlow prediction wrapper This function do some preparation for the local prediction, say, download the model from OSS, extract metadata and so on. Args: datasource: the datasource from which to get data select: data selection SQL statement data_table: tmp table which holds the data from select result_table: table to save prediction result label_column: prediction label column oss_model_path: the model path on OSS """ try: tf.enable_eager_execution() except: # noqa: E722 pass (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") fc_map_ir = feature_columns_code feature_columns = compile_ir_feature_columns(fc_map_ir, EstimatorType.TENSORFLOW) field_descs = get_ordered_field_descs(fc_map_ir) feature_column_names = [fd.name for fd in field_descs] feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs]) is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. model_local_dir = oss_model_path.split("/")[-1] if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/%s" % (oss_model_path, model_local_dir)) else: oss.load_dir(os.path.join(oss_model_path, "model_save")) _predict(datasource=datasource, estimator_string=estimator, select=select, result_table=result_table, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_column_names_map=feature_column_names_map, train_label_name=label_meta["feature_name"], result_col_name=label_column, feature_metas=feature_metas, model_params=model_params, save=model_local_dir, batch_size=1, pai_table=data_table)
def evaluate_step(datasource, select, data_table, result_table, oss_model_path, metrics): """PAI TensorFlow evaluate wrapper This function do some preparation for the local evaluation, say, download the model from OSS, extract metadata and so on. Args: datasource: the datasource from which to get data select: data selection SQL statement data_table: tmp table which holds the data from select result_table: table to save prediction result oss_model_path: the model path on OSS metrics: metrics to evaluate """ (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") fc_map_ir = feature_columns_code feature_columns = compile_ir_feature_columns(fc_map_ir, EstimatorType.TENSORFLOW) field_descs = get_ordered_field_descs(fc_map_ir) feature_column_names = [fd.name for fd in field_descs] feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs]) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. model_name = oss_model_path.split("/")[-1] if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/%s" % (oss_model_path, model_name)) else: oss.load_dir(os.path.join(oss_model_path, "model_save")) _evaluate(datasource=datasource, estimator_string=estimator, select=select, result_table=result_table, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_metas=feature_metas, label_meta=label_meta, model_params=model_params, validation_metrics=metrics, save="model_save", batch_size=1, validation_steps=None, verbose=0, pai_table=data_table)
def predict(datasource, select, data_table, result_table, label_column, oss_model_path): """PAI TensorFlow prediction wrapper This function do some preparation for the local prediction, say, download the model from OSS, extract metadata and so on. Args: datasource: the datasource from which to get data select: data selection SQL statement data_table: tmp table which holds the data from select result_table: table to save prediction result label_column: prediction label column oss_model_path: the model path on OSS """ try: tf.enable_eager_execution() except: # noqa: E722 pass (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") feature_columns = eval(feature_columns_code) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/model_save" % oss_model_path) else: oss.load_file(oss_model_path, "model_save") _predict(datasource=datasource, estimator_string=estimator, select=select, result_table=result_table, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_column_names_map=feature_column_names_map, train_label_name=label_meta["feature_name"], result_col_name=label_column, feature_metas=feature_metas, model_params=model_params, save="model_save", batch_size=1, pai_table=data_table)
def save_to_db(self, datasource, table, local_dir=None, oss_model_dir=None): """ This save function would archive all the files on local_dir into a tarball, and save it into DBMS with the specified table name. Args: datasource (str): the connection string to DBMS. table (str): the saved table name. local_dir (str): the local directory to save. Returns: None. """ if local_dir is None: local_dir = os.getcwd() conn = connect_with_data_source(datasource) if oss_model_dir: cur_dir = os.getcwd() os.chdir(local_dir) oss.load_dir(oss_model_dir) os.chdir(cur_dir) if "." not in table: project_name = conn.param("database") table = project_name + "." + table with temp_file.TemporaryDirectory() as tmp_dir: tarball = os.path.join(tmp_dir, TARBALL_NAME) self._zip(local_dir, tarball) def _bytes_reader(filename, buf_size=8 * 32): def _gen(): with open(filename, "rb") as f: while True: data = f.read(buf_size) if data: yield data else: break return _gen write_with_generator_and_metadata(datasource, table, _bytes_reader(tarball), self._to_dict()) conn.persist_table(table) conn.close() return table
def explain_step(datasource, select, data_table, result_table, label_column, oss_model_path): try: tf.enable_eager_execution() except Exception as e: sys.stderr.write("warning: failed to enable_eager_execution: %s" % e) pass (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") fc_map_ir = feature_columns_code feature_columns = compile_ir_feature_columns(fc_map_ir, EstimatorType.TENSORFLOW) field_descs = get_ordered_field_descs(fc_map_ir) feature_column_names = [fd.name for fd in field_descs] feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs]) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. model_name = oss_model_path.split("/")[-1] if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/%s" % (oss_model_path, model_name)) else: oss.load_dir(os.path.join(oss_model_path, "model_save")) # (TODO: lhw) use oss to store result image _explain(datasource=datasource, estimator_string=estimator, select=select, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_metas=feature_metas, label_meta=label_meta, model_params=model_params, save="model_save", result_table=result_table, pai_table=data_table, oss_dest=None, oss_ak=None, oss_sk=None, oss_endpoint=None, oss_bucket_name=None)