def estimator_train_and_save(estimator, model_params, save, train_dataset_fn, val_dataset_fn, log_every_n_iter, train_max_steps, eval_start_delay_secs, eval_throttle_secs, save_checkpoints_steps, metric_names, load_pretrained_model, model_meta): print("Start training using estimator model...") model_params["model_dir"] = save warm_start_from = save if load_pretrained_model else None if warm_start_from: load_pretrained_model_estimator(estimator, model_params) classifier = init_model(estimator, model_params) # do not add default Accuracy metric when using estimator to train, it will # fail when the estimator is a regressor, and estimator seems automatically # add some metrics. Only add additional metrics when user specified with # `WITH`. if tf_is_version2() and metric_names != ["Accuracy"]: classifier = tf.estimator.add_metrics(classifier, get_tf_metrics(metric_names)) estimator_train_compiled(classifier, train_dataset_fn, val_dataset_fn, log_every_n_iter, train_max_steps, eval_start_delay_secs, eval_throttle_secs) estimator_save(classifier, save, model_params, model_meta)
def set_log_level(verbose, is_estimator): assert 0 <= verbose <= 3 if not is_estimator and verbose == 1 or tf_is_version2(): tf.get_logger().setLevel( (4 - verbose) * 10) # logging.INFO levels range from 10~40 elif verbose >= 2: tf.logging.set_verbosity(tf.logging.INFO)
def keras_train_compiled(classifier, save, train_dataset, validate_dataset, label_meta, epochs, verbose, model_meta, validation_steps, has_none_optimizer): if hasattr(classifier, 'sqlflow_train_loop'): classifier.sqlflow_train_loop(train_dataset) else: if label_meta["feature_name"] != "": # FIXME(typhoonzero): this is why need to set validation_steps: # https://github.com/tensorflow/tensorflow/issues/29743#issuecomment-502028891 # remove this argument when PAI fixes this. if tf_is_version2(): validation_steps = None else: if validate_dataset is None: validation_steps = None history = classifier.fit(train_dataset, validation_steps=validation_steps, epochs=epochs if epochs else classifier.default_training_epochs(), validation_data=validate_dataset, verbose=verbose) else: history = classifier.fit(train_dataset, validation_steps=validation_steps, epochs=epochs if epochs else classifier.default_training_epochs(), verbose=verbose) train_metrics = dict() val_metrics = dict() for k in history.history.keys(): if k.startswith("val_"): val_metrics[k] = float(history.history[k][-1]) else: train_metrics[k] = float(history.history[k][-1]) print("====== Result for training set: ======") for k, v in train_metrics.items(): print("%s: %s" % (k, v)) print("====== Result for validation set: ======") for k, v in val_metrics.items(): print("%s: %s" % (k, v)) model_meta["evaluation"] = val_metrics # write model metadata to model_meta.json save_metadata("model_meta.json", model_meta) try: # NOTE: classifier.save may fail if the model has # sqlflow_train_loop and does not have Keras layers defined. # So save metadata before calling classifier.save. classifier.save(save, save_format="tf") except: # noqa: E722 if has_none_optimizer: warnings.warn("Saving model with None optimizer fails") else: six.reraise(*sys.exc_info())
def estimator_train_and_save_legacy(estimator, model_params, save, FLAGS, train_dataset_fn, val_dataset_fn, train_max_steps, eval_start_delay_secs, eval_throttle_secs, save_checkpoints_steps, metric_names, load_pretrained_model, model_meta): print("Start training using estimator model...") is_distributed = False if len(FLAGS.worker_hosts.split(",")) > 1: is_distributed = True model_params["config"] = make_estimator_distributed_runconfig( FLAGS, estimator, is_distributed, save_checkpoints_steps=save_checkpoints_steps) ckpt_dir = FLAGS.checkpointDir if FLAGS.checkpointDir else save print("Using checkpoint path: %s" % ckpt_dir) model_params["model_dir"] = ckpt_dir model_params["config"] = tf.estimator.RunConfig( tf_random_seed=get_tf_random_seed(), save_checkpoints_steps=save_checkpoints_steps) warm_start_from = save if load_pretrained_model else None if warm_start_from: load_pretrained_model_estimator(estimator, model_params) classifier = init_model(estimator, model_params) # do not add default Accuracy metric when using estimator to train, it will # fail when the estimator is a regressor, and estimator seems automatically # add some metrics. Only add additional metrics when user specified with # `WITH`. if tf_is_version2() and metric_names != ["Accuracy"]: classifier = tf.estimator.add_metrics(classifier, get_tf_metrics(metric_names)) estimator_train_compiled(classifier, train_dataset_fn, val_dataset_fn, train_max_steps, eval_start_delay_secs, eval_throttle_secs) if FLAGS.task_index != 0: print("skip exporting model on worker != 0") return estimator_save(classifier, save, model_params, model_meta)
def _build(self, experiment, run_config): feature_columns = [] for col_name in [ "sepal_length", "sepal_width", "petal_length", "petal_width" ]: feature_columns.append(tf.feature_column.numeric_column(col_name)) return tf.estimator.DNNClassifier( # pylint: disable=no-member n_classes=3, hidden_units=[10, 20], config=run_config, feature_columns=feature_columns) if __name__ == "__main__": if tf_is_version2(): raise ValueError("ALPS must run with TensorFlow == 1.15.x") odps_project = os.getenv("SQLFLOW_TEST_DB_MAXCOMPUTE_PROJECT") odps_conf = OdpsConf( accessid=os.getenv("SQLFLOW_TEST_DB_MAXCOMPUTE_AK"), accesskey=os.getenv("SQLFLOW_TEST_DB_MAXCOMPUTE_SK"), # endpoint should looks like: # "https://service.cn.maxcompute.aliyun.com/api" endpoint=os.getenv("SQLFLOW_TEST_DB_MAXCOMPUTE_ENDPOINT"), project=odps_project) features = [] for col_name in [ "sepal_length", "sepal_width", "petal_length", "petal_width" ]: # NOTE: add sparse columns like:
def estimator_predict(estimator, model_params, save, result_table, feature_column_names, feature_column_names_map, feature_columns, feature_metas, train_label_name, result_col_name, driver, conn, predict_generator, selected_cols, hdfs_namenode_addr, hive_location, hdfs_user, hdfs_pass): write_cols = selected_cols[:] try: train_label_index = selected_cols.index(train_label_name) except ValueError: train_label_index = -1 if train_label_index != -1: del write_cols[train_label_index] write_cols.append(result_col_name) # load from the exported model with open("exported_path", "r") as fn: export_path = fn.read() if tf_is_version2(): imported = tf.saved_model.load(export_path) else: imported = tf.saved_model.load_v2(export_path) def add_to_example(example, x, i): feature_name = feature_column_names[i] dtype_str = feature_metas[feature_name]["dtype"] if feature_metas[feature_name]["delimiter"] != "": if feature_metas[feature_name]["is_sparse"]: # NOTE(typhoonzero): sparse feature will get # (indices,values,shape) here, use indices only values = x[0][i][0].flatten() else: values = x[0][i].flatten() if dtype_str == "float32" or dtype_str == "float64": example.features.feature[feature_name].float_list.value.extend( list(values)) elif dtype_str == "int32" or dtype_str == "int64": example.features.feature[feature_name].int64_list.value.extend( list(values)) else: if "feature_columns" in feature_columns: idx = feature_column_names.index(feature_name) fc = feature_columns["feature_columns"][idx] else: # DNNLinearCombinedXXX have dnn_feature_columns and # linear_feature_columns param. idx = -1 try: idx = feature_column_names_map[ "dnn_feature_columns"].index(feature_name) fc = feature_columns["dnn_feature_columns"][idx] except: # noqa: E722 try: idx = feature_column_names_map[ "linear_feature_columns"].index(feature_name) fc = feature_columns["linear_feature_columns"][idx] except: # noqa: E722 pass if idx == -1: raise ValueError( "can not found feature %s in all feature columns") if dtype_str == "float32" or dtype_str == "float64": # need to pass a tuple(float, ) example.features.feature[feature_name].float_list.value.extend( (float(x[0][i][0]), )) elif dtype_str == "int32" or dtype_str == "int64": numeric_type = type(tf.feature_column.numeric_column("tmp")) if type(fc) == numeric_type: example.features.feature[ feature_name].float_list.value.extend( (float(x[0][i][0]), )) else: example.features.feature[ feature_name].int64_list.value.extend( (int(x[0][i][0]), )) elif dtype_str == "string": example.features.feature[feature_name].bytes_list.value.extend( x[0][i]) def predict(x): example = tf.train.Example() for i in range(len(feature_column_names)): add_to_example(example, x, i) return imported.signatures["predict"]( examples=tf.constant([example.SerializeToString()])) with db.buffered_db_writer(driver, conn, result_table, write_cols, 100, hdfs_namenode_addr, hive_location, hdfs_user, hdfs_pass) as w: for row, _ in predict_generator(): features = db.read_features_from_row(row, selected_cols, feature_column_names, feature_metas) result = predict((features, )) if train_label_index != -1 and len(row) > train_label_index: del row[train_label_index] if "class_ids" in result: row.append(str(result["class_ids"].numpy()[0][0])) else: # regression predictions row.append(str(result["predictions"].numpy()[0][0])) w.write(row)
def estimator_predict(result_table, feature_column_names, feature_metas, train_label_name, result_col_name, conn, predict_generator, selected_cols): write_cols = selected_cols[:] try: train_label_index = selected_cols.index(train_label_name) except ValueError: train_label_index = -1 if train_label_index != -1: del write_cols[train_label_index] write_cols.append(result_col_name) # load from the exported model with open("exported_path", "r") as fn: export_path = fn.read() if tf_is_version2(): imported = tf.saved_model.load(export_path) else: imported = tf.saved_model.load_v2(export_path) def add_to_example(example, x, i): feature_name = feature_column_names[i] dtype_str = feature_metas[feature_name]["dtype"] if feature_metas[feature_name]["delimiter"] != "": if feature_metas[feature_name]["delimiter_kv"] != "": keys = x[0][i][0].flatten() weights = x[0][i][1].flatten() weight_dtype_str = feature_metas[feature_name]["dtype_weight"] if (dtype_str == "float32" or dtype_str == "float64" or dtype_str == DataType.FLOAT32): raise ValueError( "not supported key-value feature with key type float") elif (dtype_str == "int32" or dtype_str == "int64" or dtype_str == DataType.INT64): example.features.feature[ feature_name].int64_list.value.extend(list(keys)) elif (dtype_str == "string" or dtype_str == DataType.STRING): example.features.feature[ feature_name].bytes_list.value.extend(list(keys)) if (weight_dtype_str == "float32" or weight_dtype_str == "float64" or weight_dtype_str == DataType.FLOAT32): example.features.feature["_".join( [feature_name, "weight"])].float_list.value.extend(list(weights)) else: raise ValueError( "not supported key value column weight data type: %s" % weight_dtype_str) else: # NOTE(typhoonzero): sparse feature will get # (indices,values,shape) here, use indices only values = x[0][i][0].flatten() if (dtype_str == "float32" or dtype_str == "float64" or dtype_str == DataType.FLOAT32): example.features.feature[ feature_name].float_list.value.extend(list(values)) elif (dtype_str == "int32" or dtype_str == "int64" or dtype_str == DataType.INT64): example.features.feature[ feature_name].int64_list.value.extend(list(values)) else: if (dtype_str == "float32" or dtype_str == "float64" or dtype_str == DataType.FLOAT32): # need to pass a tuple(float, ) example.features.feature[feature_name].float_list.value.extend( (float(x[0][i][0]), )) elif (dtype_str == "int32" or dtype_str == "int64" or dtype_str == DataType.INT64): example.features.feature[feature_name].int64_list.value.extend( (int(x[0][i][0]), )) elif dtype_str == "string" or dtype_str == DataType.STRING: example.features.feature[feature_name].bytes_list.value.extend( x[0][i]) def predict(x): example = tf.train.Example() for i in range(len(feature_column_names)): add_to_example(example, x, i) return imported.signatures["predict"]( examples=tf.constant([example.SerializeToString()])) with db.buffered_db_writer(conn, result_table, write_cols, 100) as w: for row, _ in predict_generator(): features = db.read_features_from_row(row, selected_cols, feature_column_names, feature_metas, is_xgboost=False) result = predict((features, )) if train_label_index != -1 and len(row) > train_label_index: del row[train_label_index] if "class_ids" in result: row.append(str(result["class_ids"].numpy()[0][0])) else: # regression predictions row.append(str(result["predictions"].numpy()[0][0])) w.write(row)