def evaluate(datasource, estimator_string, select, result_table, feature_columns, feature_column_names, feature_metas={}, label_meta={}, model_params={}, validation_metrics=["Accuracy"], save="", batch_size=1, validation_steps=None, verbose=0, pai_table=""): FLAGS = define_tf_flags() set_oss_environs(FLAGS) estimator_cls = import_model(estimator_string) is_estimator = is_tf_estimator(estimator_cls) set_log_level(verbose, is_estimator) is_pai = True if pai_table else False eval_dataset = get_dataset_fn(select, datasource, feature_column_names, feature_metas, label_meta, is_pai=is_pai, pai_table=pai_table, batch_size=batch_size) model_params.update(feature_columns) pop_optimizer_and_loss(model_params) if is_estimator: with open("exported_path", "r") as fid: exported_path = str(fid.read()) model_params["warm_start_from"] = exported_path estimator = estimator_cls(**model_params) result_metrics = estimator_evaluate(estimator, eval_dataset, validation_metrics) else: keras_model = init_model_with_feature_column(estimator_cls, model_params) keras_model_pkg = sys.modules[estimator_cls.__module__] result_metrics = keras_evaluate(keras_model, eval_dataset, save, keras_model_pkg, validation_metrics) if result_table: metric_name_list = ["loss"] + validation_metrics if is_pai: conn = PaiIOConnection.from_table(result_table) else: conn = db.connect_with_data_source(datasource) write_result_metrics(result_metrics, metric_name_list, result_table, conn) conn.close()
def init_pai_local_tf_flags_and_envs(oss_model_dir): FLAGS = define_tf_flags() FLAGS.sqlflow_oss_ak = os.getenv("SQLFLOW_OSS_AK") FLAGS.sqlflow_oss_sk = os.getenv("SQLFLOW_OSS_SK") FLAGS.sqlflow_oss_ep = os.getenv("SQLFLOW_OSS_MODEL_ENDPOINT") if not oss_model_dir.startswith("oss://"): oss_model_dir = pai_model.get_oss_model_url(oss_model_dir) FLAGS.sqlflow_oss_modeldir = oss_model_dir FLAGS.checkpointDir = os.getcwd() set_oss_environs(FLAGS)
if name in params: dict_args[name] = params[name] return func(**dict_args) def entrypoint(): with open("train_params.pkl", "rb") as file: params = pickle.load(file) if params["entry_type"] == "train_tf": call_fun(train_tf, params) elif params["entry_type"] == "train_xgb": call_fun(train_xgb, params) elif params["entry_type"] == "predict_tf": call_fun(predict_tf, params) elif params["entry_type"] == "predict_xgb": call_fun(predict_xgb, params) elif params["entry_type"] == "explain_tf": call_fun(explain_tf, params) elif params["entry_type"] == "explain_xgb": call_fun(explain_xgb, params) elif params["entry_type"] == "evaluate_tf": call_fun(evaluate_tf, params) elif params["entry_type"] == "evaluate_xgb": call_fun(evaluate_xgb, params) if __name__ == "__main__": FLAGS = define_tf_flags() set_oss_environs(FLAGS) entrypoint()
def train(datasource, estimator_string, select, validation_select, feature_columns, feature_column_names, feature_metas={}, label_meta={}, model_params={}, validation_metrics=["Accuracy"], save="", batch_size=1, epoch=1, validation_steps=1, verbose=0, max_steps=None, validation_start_delay_secs=0, validation_throttle_secs=0, save_checkpoints_steps=100, log_every_n_iter=10, load_pretrained_model=False, is_pai=True, pai_table="", pai_val_table="", feature_columns_code="", model_params_code_map={}, model_repo_image="", original_sql="", feature_column_names_map=None): # TODO(sneaxiy): collect features and label model_meta = collect_metadata(original_sql=original_sql, select=select, validation_select=validation_select, model_repo_image=model_repo_image, class_name=estimator_string, attributes=model_params, features=None, label=None) estimator = import_model(estimator_string) is_estimator = is_tf_estimator(estimator) if verbose < 1: # always use verbose == 1 when using PAI to get more logs verbose = 1 set_log_level(verbose, is_estimator) model_params.update(feature_columns) FLAGS = define_tf_flags() set_oss_environs(FLAGS) num_workers = len(FLAGS.worker_hosts.split(",")) worker_id = FLAGS.task_index train_dataset_fn = get_dataset_fn(select, datasource, feature_column_names, feature_metas, label_meta, is_pai, pai_table, batch_size, epochs=epoch, shuffle_size=1000, num_workers=num_workers, worker_id=worker_id) val_dataset_fn = None if validation_select: val_dataset_fn = get_dataset_fn(validation_select, datasource, feature_column_names, feature_metas, label_meta, is_pai, pai_val_table, batch_size) if not is_estimator: if isinstance(estimator, types.FunctionType): # functional model need field_metas parameter model_params["field_metas"] = feature_metas keras_train_and_save(estimator, model_params, save, FLAGS, train_dataset_fn, val_dataset_fn, label_meta, epoch, verbose, validation_metrics, validation_steps, load_pretrained_model, model_meta) else: estimator_train_and_save(estimator, model_params, save, FLAGS, train_dataset_fn, val_dataset_fn, log_every_n_iter, max_steps, validation_start_delay_secs, validation_throttle_secs, save_checkpoints_steps, validation_metrics, load_pretrained_model, model_meta) # save model to OSS if num_workers == 1 or worker_id == 0: oss_model_dir = FLAGS.sqlflow_oss_modeldir oss.save_oss_model(oss_model_dir, estimator_string, is_estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params_code_map, feature_columns_code, num_workers) print("Model saved to oss: %s" % oss_model_dir) print("Done training")
def train_step(original_sql, model_image, estimator_string, datasource, select, validation_select, model_params, train_params, validation_params, feature_column_map, label_column, save, load=None, pai_table=None, pai_val_table=None): if model_params is None: model_params = {} if train_params is None: train_params = {} if validation_params is None: validation_params = {} if load: Model.load_from_db(datasource, load) load = "model_save" else: load = None is_pai = True if pai_table else False fc_map = compile_ir_feature_columns(feature_column_map, EstimatorType.TENSORFLOW) field_descs = get_ordered_field_descs(feature_column_map) feature_column_names = [fd.name for fd in field_descs] feature_metas = dict([(fd.name, fd.to_dict(dtype_to_string=True)) for fd in field_descs]) # no label for clustering model label_meta = None if label_column: label_meta = label_column.get_field_desc()[0].to_dict( dtype_to_string=True) feature_column_names_map = dict() for target in feature_column_map: fclist = feature_column_map[target] feature_column_names_map[target] = [ fc.get_field_desc()[0].name for fc in fclist ] # Construct optimizer objects to pass to model initializer. # The original model_params is serializable (do not have tf.xxx objects). model_params_constructed = copy.deepcopy(model_params) for optimizer_arg in ["optimizer", "dnn_optimizer", "linear_optimizer"]: if optimizer_arg in model_params_constructed: model_params_constructed[optimizer_arg] = get_tf_optimizer( model_params_constructed[optimizer_arg]) if "loss" in model_params_constructed: model_params_constructed["loss"] = get_tf_loss( model_params_constructed["loss"]) # extract params for training. verbose = train_params.get("verbose", 1) batch_size = train_params.get("batch_size", 1) epoch = train_params.get("epoch", 1) save_checkpoints_steps = train_params.get("save_checkpoints_steps", 100) max_steps = train_params.get("max_steps", None) if max_steps is not None and max_steps <= 0: max_steps = None validation_metrics = validation_params.get("metrics", "Accuracy") validation_metrics = [v.strip() for v in validation_metrics.split(",")] validation_steps = validation_params.get("steps", 1) validation_start_delay_secs = validation_params.get("start_delay_secs", 0) validation_throttle_secs = validation_params.get("throttle_secs", 0) estimator = import_model(estimator_string) is_estimator = is_tf_estimator(estimator) # always use verbose == 1 when using PAI to get more logs if verbose < 1: verbose = 1 set_log_level(verbose, is_estimator) model_params_constructed.update(fc_map) FLAGS = define_tf_flags() set_oss_environs(FLAGS) num_workers = len(FLAGS.worker_hosts.split(",")) worker_id = FLAGS.task_index train_dataset_fn = get_dataset_fn(select, datasource, feature_column_names, feature_metas, label_meta, is_pai, pai_table, batch_size, epochs=epoch, shuffle_size=1000, num_workers=num_workers, worker_id=worker_id) val_dataset_fn = None if validation_select or pai_val_table: val_dataset_fn = get_dataset_fn(validation_select, datasource, feature_column_names, feature_metas, label_meta, is_pai, pai_val_table, batch_size) model_meta = collect_metadata(original_sql=original_sql, select=select, validation_select=validation_select, model_repo_image=model_image, class_name=estimator_string, attributes=model_params, features=feature_column_map, label=label_column) # FIXME(typhoonzero): avoid save model_meta twice, keras_train_and_save, # estimator_train_and_save also dumps model_meta to a file under cwd. # should only keep the model.save_to_db part. save_dir = "model_save" if not is_estimator: if isinstance(estimator, types.FunctionType): # functional model need field_metas parameter model_params_constructed["field_metas"] = feature_metas keras_train_and_save(estimator, model_params_constructed, save_dir, FLAGS, train_dataset_fn, val_dataset_fn, label_meta, epoch, verbose, validation_metrics, validation_steps, load, model_meta, is_pai) else: estimator_train_and_save(estimator, model_params_constructed, save_dir, FLAGS, train_dataset_fn, val_dataset_fn, max_steps, validation_start_delay_secs, validation_throttle_secs, save_checkpoints_steps, validation_metrics, load, model_meta) # save model to DB/OSS model = Model(EstimatorType.TENSORFLOW, model_meta) if num_workers == 1 or worker_id == 0: saved = model.save_to_db(datasource, save, oss_model_dir=FLAGS.sqlflow_oss_modeldir) print("Model saved to DB: %s" % saved) print("Done training")