def keras_train_distributed(classifier, model_params, save, model_meta, FLAGS, train_dataset_fn, val_dataset_fn, is_pai=True): # train keras model distributed cluster, task_type, task_index = make_distributed_info_without_evaluator( FLAGS) dump_into_tf_config(cluster, task_type, task_index) dist_strategy = tf.contrib.distribute.ParameterServerStrategy() run_config = tf.estimator.RunConfig(tf_random_seed=get_tf_random_seed(), save_checkpoints_steps=100, train_distribute=dist_strategy, session_config=tf.ConfigProto( log_device_placement=True, device_filters=None)) model_dir = FLAGS.checkpointDir keras_estimator = tf.keras.estimator.model_to_estimator( classifier, model_dir=model_dir, config=run_config) estimator_train_compiled( keras_estimator, train_dataset_fn, val_dataset_fn, # TODO(typhoonzero): do pass train settings. 100, None, 60, 120) # FIXME(typhoonzero): predict keras distributed model should # also call model_to_estimator. # export saved model for prediction if "feature_columns" in model_params: all_feature_columns = model_params["feature_columns"] elif "linear_feature_columns" in model_params \ and "dnn_feature_columns" in model_params: import copy all_feature_columns = copy.copy(model_params["linear_feature_columns"]) all_feature_columns.extend(model_params["dnn_feature_columns"]) else: raise Exception("No expected feature columns in model params") serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( # noqa: E501 tf.feature_column.make_parse_example_spec(all_feature_columns)) export_path = keras_estimator.export_saved_model(save, serving_input_fn) # write the path under current directory export_path_str = str(export_path.decode("utf-8")) with open("exported_path", "w") as fn: fn.write(export_path_str) # write model metadata to model_meta.json save_metadata("model_meta.json", model_meta) print("Done training, model exported to: %s" % export_path_str)
def keras_train_compiled(classifier, save, train_dataset, validate_dataset, label_meta, epochs, verbose, model_meta, validation_steps, has_none_optimizer): if hasattr(classifier, 'sqlflow_train_loop'): classifier.sqlflow_train_loop(train_dataset) else: if label_meta["feature_name"] != "": # FIXME(typhoonzero): this is why need to set validation_steps: # https://github.com/tensorflow/tensorflow/issues/29743#issuecomment-502028891 # remove this argument when PAI fixes this. if tf_is_version2(): validation_steps = None else: if validate_dataset is None: validation_steps = None history = classifier.fit(train_dataset, validation_steps=validation_steps, epochs=epochs if epochs else classifier.default_training_epochs(), validation_data=validate_dataset, verbose=verbose) else: history = classifier.fit(train_dataset, validation_steps=validation_steps, epochs=epochs if epochs else classifier.default_training_epochs(), verbose=verbose) train_metrics = dict() val_metrics = dict() for k in history.history.keys(): if k.startswith("val_"): val_metrics[k] = float(history.history[k][-1]) else: train_metrics[k] = float(history.history[k][-1]) print("====== Result for training set: ======") for k, v in train_metrics.items(): print("%s: %s" % (k, v)) print("====== Result for validation set: ======") for k, v in val_metrics.items(): print("%s: %s" % (k, v)) model_meta["evaluation"] = val_metrics # write model metadata to model_meta.json save_metadata("model_meta.json", model_meta) try: # NOTE: classifier.save may fail if the model has # sqlflow_train_loop and does not have Keras layers defined. # So save metadata before calling classifier.save. classifier.save(save, save_format="tf") except: # noqa: E722 if has_none_optimizer: warnings.warn("Saving model with None optimizer fails") else: six.reraise(*sys.exc_info())
def estimator_save(classifier, save, model_params, model_meta): # export saved model for prediction if "feature_columns" in model_params: all_feature_columns = model_params["feature_columns"] elif "linear_feature_columns" in model_params \ and "dnn_feature_columns" in model_params: import copy all_feature_columns = copy.copy(model_params["linear_feature_columns"]) all_feature_columns.extend(model_params["dnn_feature_columns"]) else: raise Exception("No expected feature columns in model params") serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( # noqa: E501 tf.feature_column.make_parse_example_spec(all_feature_columns)) export_path = classifier.export_saved_model(save, serving_input_fn) # write the path under current directory export_path_str = str(export_path.decode("utf-8")) with open("exported_path", "w") as fn: fn.write(export_path_str) # write model metadata to model_meta.json save_metadata("model_meta.json", model_meta) print("Done training, model exported to: %s" % export_path_str)
def train(datasource, select, model_params, train_params, feature_metas, feature_column_names, label_meta, validation_select, disk_cache=False, batch_size=None, epoch=1, load_pretrained_model=False, is_pai=False, pai_train_table="", pai_validate_table="", rank=0, nworkers=1, oss_model_dir="", transform_fn=None, feature_column_code="", model_repo_image="", original_sql=""): if batch_size == -1: batch_size = None print("Start training XGBoost model...") dtrain = xgb_dataset(datasource, 'train.txt', select, feature_metas, feature_column_names, label_meta, is_pai, pai_train_table, cache=disk_cache, batch_size=batch_size, epoch=epoch, rank=rank, nworkers=nworkers, transform_fn=transform_fn, feature_column_code=feature_column_code) if len(validation_select.strip()) > 0: dvalidate = list( xgb_dataset(datasource, 'validate.txt', validation_select, feature_metas, feature_column_names, label_meta, is_pai, pai_validate_table, rank=rank, nworkers=nworkers, transform_fn=transform_fn, feature_column_code=feature_column_code))[0] filename = "my_model" if load_pretrained_model: bst = xgb.Booster() bst.load_model(filename) else: bst = None re = None for per_batch_dmatrix in dtrain: watchlist = [(per_batch_dmatrix, "train")] if len(validation_select.strip()) > 0: watchlist.append((dvalidate, "validate")) re = dict() bst = xgb.train(model_params, per_batch_dmatrix, evals=watchlist, evals_result=re, xgb_model=bst, **train_params) print("Evaluation result: %s" % re) if rank == 0: # TODO(sneaxiy): collect features and label metadata = collect_metadata(original_sql=original_sql, select=select, validation_select=validation_select, model_repo_image=model_repo_image, class_name=model_params.get("booster"), attributes=model_params, features=None, label=None, evaluation=re) save_model_to_local_file(bst, model_params, filename) save_metadata("model_meta.json", metadata) if is_pai and len(oss_model_dir) > 0: save_model(oss_model_dir, filename, model_params, train_params, feature_metas, feature_column_names, label_meta, feature_column_code)