def find_hyperparams( config_file: str, train_best_model: bool = typer.Argument(False), ): search_config = _load_config(config_file, "search") param_grid = search_config["grid"] n_jobs = search_config["jobs"] metric = _load_config(config_file, "metrics")[0] dummy_hyperparams = {name: {} for name in param_grid.keys()} estimator = model.build_estimator(dummy_hyperparams) scoring = metrics.get_scoring_function(metric["name"], **metric["params"]) gs = GridSearchCV( estimator, _param_grid_to_sklearn_format(param_grid), n_jobs=n_jobs, scoring=scoring, verbose=3, ) split = "train" X, y = _get_dataset(_load_config(config_file, "data"), splits=[split])[split] gs.fit(X, y) hyperparams = _param_grid_to_custom_format(gs.best_params_) estimator = model.build_estimator(hyperparams) output_dir = _load_config(config_file, "export")["output_dir"] _save_versioned_estimator(estimator, hyperparams, output_dir)
def _experiment_fn(run_config, hparams): # num_epochs can control duration if train_steps isn't # passed to Experiment train_input = lambda: model.generate_input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, ) # Don't shuffle evaluation data eval_input = lambda: model.generate_input_fn(hparams.eval_files, batch_size=hparams. eval_batch_size, shuffle=False) return tf.contrib.learn.Experiment( model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max( 2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config), train_input_fn=train_input, eval_input_fn=eval_input, **experiment_args)
def _experiment_fn(run_config, hparams): # num_epochs can control duration if train_steps isn't # passed to Experiment train_input = lambda: model.generate_input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, ) # Don't shuffle evaluation data eval_input = lambda: model.generate_input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False ) return tf.contrib.learn.Experiment( model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config ), train_input_fn=train_input, eval_input_fn=eval_input, **experiment_args )
def _experiment_fn(output_dir): # num_epochs can control duration if train_steps isn't # passed to Experiment train_input = model.generate_input_fn( train_files, num_epochs=num_epochs, batch_size=train_batch_size, ) # Don't shuffle evaluation data eval_input = model.generate_input_fn(eval_files, batch_size=eval_batch_size, shuffle=False) return tf.contrib.learn.Experiment( model.build_estimator( output_dir, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(first_layer_size * scale_factor**i)) for i in range(num_layers) ]), train_input_fn=train_input, eval_input_fn=eval_input, # export strategies control the prediction graph structure # of exported binaries. export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], **experiment_args)
def train(config_file: str): hyperparams = _load_config(config_file, "hyperparams") split = "train" X, y = _get_dataset(_load_config(config_file, "data"), splits=[split])[split] estimator = model.build_estimator(hyperparams) estimator.fit(X, y) output_dir = _load_config(config_file, "export")["output_dir"] version = _save_versioned_estimator(estimator, hyperparams, output_dir) return version
def train_and_maybe_evaluate(hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ schema = bookings.read_schema(hparams.schema_file) tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir) train_input = lambda: model.input_fn( hparams.train_files, tf_transform_output, batch_size=TRAIN_BATCH_SIZE ) eval_input = lambda: model.input_fn( hparams.eval_files, tf_transform_output, batch_size=EVAL_BATCH_SIZE ) train_spec = tf.estimator.TrainSpec( train_input, max_steps=hparams.train_steps) serving_receiver_fn = lambda: model.example_serving_receiver_fn( tf_transform_output, schema) exporter = tf.estimator.FinalExporter('bookings', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec( eval_input, steps=hparams.eval_steps, exporters=[exporter], name='bookings-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=999, keep_checkpoint_max=1) serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( tf_transform_output, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i)) for i in range(NUM_DNN_LAYERS) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return estimator
def train_and_maybe_evaluate(train_files, eval_files, hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ schema = taxi.read_schema('schema.pbtxt') train_input = lambda: model.input_fn( train_files, hparams.tf_transform_dir, batch_size=TRAIN_BATCH_SIZE ) eval_input = lambda: model.input_fn( eval_files, hparams.tf_transform_dir, batch_size=EVAL_BATCH_SIZE ) train_spec = tf.estimator.TrainSpec( train_input, max_steps=hparams.train_steps) serving_receiver_fn = lambda: model.example_serving_receiver_fn( hparams.tf_transform_dir, schema) exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec( eval_input, steps=hparams.eval_steps, exporters=[exporter], name='chicago-taxi-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=999, keep_checkpoint_max=1) serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( hparams.tf_transform_dir, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i)) for i in range(NUM_DNN_LAYERS) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return estimator
def _experiment_fn(run_config, hparams): # num_epochs can control duration if train_steps isn't # passed to Experiment train_input = lambda: model.generate_input_fn() # Don't shuffle evaluation data eval_input = lambda: model.generate_input_fn(shuffle=False) return tf.contrib.learn.Experiment( model.build_estimator(config=run_config), train_input_fn=train_input, eval_input_fn=eval_input, **experiment_args)
def _experiment_fn(output_dir): return tf.contrib.learn.Experiment( model.build_estimator(output_dir, n_classes, params, config), train_input_fn=model.make_input_fn_stroke( files_pattern=os.path.join(data_dir, 'train-*'), batch_size=train_batch_size), eval_input_fn=model.make_input_fn_stroke( files_pattern=os.path.join(data_dir, 'eval-*'), batch_size=eval_batch_size), export_strategies=[ tf.contrib.learn.utils.saved_model_export_utils. make_export_strategy(model.serving_input_fn, exports_to_keep=1) ], train_steps=train_steps, eval_steps=eval_steps, **experiment_args)
def train_and_evaluate(args): """Run the training and evaluate using the high level API""" train_input = model._make_training_input_fn( args.tft_working_dir, args.train_filebase, num_epochs=args.num_epochs, batch_size=args.train_batch_size, buffer_size=args.train_buffer_size, prefetch_buffer_size=args.train_prefetch_buffer_size) # Don't shuffle evaluation data. eval_input = model._make_training_input_fn( args.tft_working_dir, args.eval_filebase, shuffle=False, batch_size=args.eval_batch_size, buffer_size=1, prefetch_buffer_size=args.eval_prefetch_buffer_size) train_spec = tf.estimator.TrainSpec( train_input, max_steps=args.train_steps) exporter = tf.estimator.FinalExporter( 'tft_classifier', model._make_serving_input_fn(args.tft_working_dir)) eval_spec = tf.estimator.EvalSpec( eval_input, steps=args.eval_steps, exporters=[exporter], name='tft_classifier-eval') run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=args.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator( config=run_config, tft_working_dir=args.tft_working_dir, embedding_size=args.embedding_size, # Construct layers sizes with exponential decay. hidden_units=[ max(2, int(args.first_layer_size * args.scale_factor**i)) for i in range(args.num_layers) ], ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = model._make_training_input_fn( hparams.tft_working_dir, hparams.train_filebase, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, buffer_size=hparams.train_buffer_size, prefetch_buffer_size=hparams.train_prefetch_buffer_size) # Don't shuffle evaluation data eval_input = model._make_training_input_fn( hparams.tft_working_dir, hparams.eval_filebase, shuffle=False, batch_size=hparams.eval_batch_size, buffer_size=1, prefetch_buffer_size=hparams.eval_prefetch_buffer_size) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps) exporter = tf.estimator.FinalExporter( 'tft_classifier', model._make_serving_input_fn(hparams.tft_working_dir)) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='tft_classifier-eval') run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator( config=run_config, tft_working_dir=hparams.tft_working_dir, embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def _experiment_fn(output_dir): return Experiment( model.build_estimator(output_dir), train_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'train.tfrecords'), batch_size=train_batch_size), eval_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'test.tfrecords'), batch_size=eval_batch_size), export_strategies=[saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1)], train_steps=train_steps, eval_steps=eval_steps, **experiment_args )
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = lambda: model.input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size ) # Don't shuffle evaluation data eval_input = lambda: model.input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False ) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps ) exporter = tf.estimator.FinalExporter('census', model.SERVING_FUNCTIONS[hparams.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='census-eval' ) run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def _experiment_fn(output_dir): return tf.contrib.learn.Experiment( model.build_estimator(output_dir, n_classes, params, config), train_input_fn=model.make_input_fn( files_pattern=os.path.join(data_dir, 'train-*'), batch_size=train_batch_size), eval_input_fn=model.make_input_fn( files_pattern=os.path.join(data_dir, 'eval-*'), batch_size=eval_batch_size), export_strategies=[ tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy( model.serving_input_fn, exports_to_keep=1) ], train_steps=train_steps, eval_steps=eval_steps, **experiment_args )
def _experiment_fn(output_dir): train_input = model.generate_input_fn(train_file, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = model.generate_input_fn(eval_file, batch_size=eval_batch_size) return Experiment(model.build_estimator(job_dir, embedding_size=embedding_size, hidden_units=hidden_units), train_input_fn=train_input, eval_input_fn=eval_input, export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], **experiment_args)
def _experiment_fn(run_config, hparams): """Definicion de experimento""" # Funcion de entrada de entrenamiento train_input = lambda: model.generate_input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, ) # Funcion de entrada de evaluacion eval_input = lambda: model.generate_input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size ) # Experimento return tf.contrib.learn.Experiment( model.build_estimator(hparams.job_dir), train_input_fn=train_input, eval_input_fn=eval_input, **experiment_args )
def _experiment_fn(output_dir): input_fn = model.generate_csv_input_fn train_input = input_fn(train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = input_fn(eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL) return Experiment(model.build_estimator(output_dir, hidden_units=hidden_units), train_input_fn=train_input, eval_input_fn=eval_input, export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], eval_metrics=model.get_eval_metrics(), **experiment_args)
def _experiment_fn(output_dir): input_fn = model.generate_csv_input_fn train_input = input_fn( train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = input_fn( eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL) return Experiment( model.build_estimator( output_dir, hidden_units=hidden_units ), train_input_fn=train_input, eval_input_fn=eval_input, export_strategies=[saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1 )], eval_metrics=model.get_eval_metrics(), #min_eval_frequency = 1000, # change this to speed up training on large datasets **experiment_args )
def _experiment_fn(output_dir): input_fn = (model.generate_csv_input_fn if format == 'csv' else model.generate_tfrecord_input_fn) train_input = input_fn( train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = input_fn( eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL) return Experiment( model.build_estimator( output_dir, nbuckets=nbuckets, hidden_units=parse_to_int(hidden_units) ), train_input_fn=train_input, eval_input_fn=eval_input, export_strategies=[saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1 )], eval_metrics=model.get_eval_metrics(), #min_eval_frequency = 1000, # change this to speed up training on large datasets **experiment_args )
def _experiment_fn(output_dir): train_input = model.generate_input_fn(train_file, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = model.generate_input_fn(eval_file, batch_size=eval_batch_size) return learn.Experiment( model.build_estimator(job_dir, embedding_size=embedding_size, hidden_units=hidden_units), train_input_fn=train_input, eval_input_fn=eval_input, eval_metrics={ 'training/hptuning/metric': learn.MetricSpec(metric_fn=metrics.streaming_accuracy, prediction_key='logits') }, export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], **experiment_args)
parser.add_argument( '--job-dir', help='GCS location to write checkpoints and export models', required=True) # Argument to turn on all logging parser.add_argument( '--verbosity', choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'], default='INFO', ) args = parser.parse_args() run_config = run_config.RunConfig(model_dir=args.job_dir) estimator = model.build_estimator(run_config) def prediction_input_fn(): feature_placeholders = { 'wvec': tf.placeholder(tf.float32, [1, 2, 3]), 'dvec': tf.placeholder(tf.float32, [1, 2, 3]), } features = { key: tf.expand_dims(tensor, -1) for key, tensor in feature_placeholders.items() } return tf.contrib.learn.InputFnOps(features, None, feature_placeholders) predictor = from_contrib_estimator(estimator=estimator,
# df['signup_date'] = df['signup_date'].apply(lambda x: start + timedelta(days=x)) predict_df['last_service_use_date'] = predict_df[ 'last_service_use_date'].apply(lambda x: start + timedelta(days=x)) # df.rename(columns={'Unnamed: 0': 'user_id'}, inplace=True) # Get user's recency predict_df['recency'] = predict_df['last_service_use_date'].apply( lambda x: (predict_df.last_service_use_date.max() - x).days) # Convert True False to 0 & 1 predict_df.loc[predict_df['business_service'] == True, 'business_service'] = '1' predict_df.loc[predict_df['business_service'] == False, 'business_service'] = '0' predict_df['is_retained'] = 0 # df.loc[df['last_service_use_date'].dt.month.isin([6,7]), 'is_retained'] = 1 predict_df.business_service = predict_df.business_service.astype(str) predict_df.dropna(inplace=True) m = build_estimator('model_dir') predicted_values = list(m.predict(input_fn=lambda: input_fn(predict_df))) probs = list(m.predict_proba(input_fn=lambda: input_fn(predict_df))) predict_df['predicted_values'] = predicted_values predict_df['probs'] = probs predict_df.to_csv('predicttions.csv')
def train_and_evaluate_model(config, hparams): """Runs the local training job given provided command line arguments. Args: config: RunConfig object hparams: dictionary passed by command line arguments """ with open(hparams['train_file']) as f: if hparams['trainer_type'] == 'spam': contents, labels, _ = train_ml_helpers.spam_from_file(f) else: contents, labels = train_ml_helpers.component_from_file(f) logger.info('Training data received. Len: %d' % len(contents)) # Generate features and targets from extracted contents and labels. if hparams['trainer_type'] == 'spam': features, targets = train_ml_helpers \ .transform_spam_csv_to_features(contents, labels) else: top_list = top_words.make_top_words_list(contents, hparams['job_dir']) features, targets, index_to_component = train_ml_helpers \ .transform_component_csv_to_features(contents, labels, top_list) # Split training and testing set. logger.info('Features generated') features_train, features_test, targets_train, targets_test = train_test_split( features, targets, test_size=0.2, random_state=42) # Generate TrainSpec and EvalSpec for train and evaluate. estimator = model.build_estimator(config=config, job_dir=hparams['job_dir'], trainer_type=hparams['trainer_type'], class_count=len(set(labels))) exporter = tf.estimator.LatestExporter( name='saved_model', serving_input_receiver_fn=generate_json_input_fn( hparams['trainer_type'])) train_spec = tf.estimator.TrainSpec(input_fn=make_input_fn( hparams['trainer_type'], features_train, targets_train, num_epochs=hparams['num_epochs'], batch_size=hparams['train_batch_size']), max_steps=hparams['train_steps']) eval_spec = tf.estimator.EvalSpec(input_fn=make_input_fn( hparams['trainer_type'], features_test, targets_test, shuffle=False, batch_size=hparams['eval_batch_size']), exporters=exporter, steps=hparams['eval_steps']) if hparams['trainer_type'] == 'component': store_component_conversion(hparams['job_dir'], index_to_component) result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) logging.info(result)