def launch_training(self, nb_steps: int): run_config = RunConfig( model_dir=str(self.training_path), save_checkpoints_steps=self.SAVE_CHECKPOINTS_STEPS, keep_checkpoint_max=2) train_and_eval_dict = create_estimator_and_inputs( run_config=run_config, pipeline_config_path=str(self.config_path)) estimator = train_and_eval_dict["estimator"] train_input_fn = train_and_eval_dict["train_input_fn"] eval_input_fns = train_and_eval_dict["eval_input_fns"] predict_input_fn = train_and_eval_dict["predict_input_fn"] train_spec = TrainSpec(train_input_fn, nb_steps) eval_spec = EvalSpec( name="0", input_fn=eval_input_fns[0], steps=None, exporters=FinalExporter( name="Servo", serving_input_receiver_fn=predict_input_fn), throttle_secs=self.EVAL_EVERY_SECS, ) train_and_evaluate(estimator, train_spec, eval_spec) return self
def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None, use_train_and_evaluate=True): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync, shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ tf.feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ tf.feature_column.numeric_column("x", shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns eval_spec = estimator_training.EvalSpec( name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter(EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1) if use_train_and_evaluate: estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), eval_spec) else: estimator.train(train_input_fn, max_steps=MAX_STEPS) latest_ckpt_path = estimator.latest_checkpoint() metrics = estimator.evaluate( eval_input_fn, checkpoint_path=latest_ckpt_path, name=EVAL_NAME) # Export the eval result to files. eval_result = estimator_training._EvalResult( status=estimator_training._EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) evaluator = estimator_training._TrainingExecutor._Evaluator( estimator, eval_spec, None) evaluator._export_eval_result(eval_result, True) return estimator
def test_estimator_with_strategy_hooks(self, distribution, use_train_and_evaluate): config = run_config.RunConfig(eval_distribute=distribution) def _input_map_fn(tensor): return {'feature': tensor}, tensor def input_fn(): return tf.data.Dataset.from_tensors( [1.]).repeat(10).batch(5).map(_input_map_fn) def model_fn(features, labels, mode): del features, labels global_step = tf.compat.v1.train.get_global_step() if mode == model_fn_lib.ModeKeys.TRAIN: train_hook1 = tf.compat.v1.train.StepCounterHook( every_n_steps=1, output_dir=self.get_temp_dir()) train_hook2 = tf.compat.v1.test.mock.MagicMock( wraps=tf.compat.v1.train.SessionRunHook(), spec=tf.compat.v1.train.SessionRunHook) return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=global_step.assign_add(1), training_hooks=[train_hook1, train_hook2]) if mode == model_fn_lib.ModeKeys.EVAL: eval_hook1 = tf.compat.v1.train.StepCounterHook( every_n_steps=1, output_dir=self.get_temp_dir()) eval_hook2 = tf.compat.v1.test.mock.MagicMock( wraps=tf.compat.v1.train.SessionRunHook(), spec=tf.compat.v1.train.SessionRunHook) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(1.), evaluation_hooks=[eval_hook1, eval_hook2]) num_steps = 10 estimator = estimator_lib.EstimatorV2(model_fn=model_fn, model_dir=self.get_temp_dir(), config=config) if use_train_and_evaluate: training.train_and_evaluate( estimator, training.TrainSpec(input_fn, max_steps=num_steps), training.EvalSpec(input_fn)) else: estimator.train(input_fn, steps=num_steps) estimator.evaluate(input_fn, steps=num_steps)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer='Adagrad', linear_optimizer='Adagrad', config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir))