def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" hooks = [_FakeHook()] spec = training.TrainSpec(input_fn=lambda: 1, max_steps=2, hooks=hooks) self.assertEqual(1, spec.input_fn()) self.assertEqual(2, spec.max_steps) self.assertEqual(tuple(hooks), spec.hooks)
def test_invalid_estimator(self): invalid_estimator = object() train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG): training._TrainingExecutor(invalid_estimator, train_spec, eval_spec)
def testRequiredArgumentsSet(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) executor = training._TrainingExecutor(estimator, train_spec, eval_spec) self.assertEqual(estimator, executor.estimator)
def test_train_with_train_spec(self, mock_server, unused_mock_sleep): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) mock_server_instance = mock_server.return_value executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec) self._run_task(executor) mock_server.assert_called_with(mock_est.config.cluster_spec, job_name=mock_est.config.task_type, task_index=mock_est.config.task_id, config=test.mock.ANY, start=False) self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=train_spec.hooks) mock_est.evaluate.assert_not_called() mock_est.export_savedmodel.assert_not_called()
def test_invalid_eval_spec(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) invalid_eval_spec = object() with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG): training._TrainingExecutor(estimator, train_spec, invalid_eval_spec)
def test_train_and_evaluate_args(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint.return_value = 'checkpoint_path/' train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec(input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='local_eval') mock_est.evaluate.return_value = { _GLOBAL_STEP_KEY: train_spec.max_steps } executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() mock_est.evaluate.assert_called_with( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=eval_spec.steps, checkpoint_path='checkpoint_path/', hooks=eval_spec.hooks) train_args = mock_est.train.call_args[1] self.assertEqual(list(train_spec.hooks), list(train_args['hooks'][:-1])) self.assertEqual(train_spec.input_fn, train_args['input_fn']) self.assertEqual(train_spec.max_steps, train_args['max_steps'])
def test_runs_in_a_loop_until_max_steps(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn mock_est.times_export_fn_was_called = 0 def export_fn(estimator, *args, **kwargs): del args, kwargs estimator.times_export_fn_was_called += 1 export_strategy = export_strategy_lib.ExportStrategy( name='see_whether_export_fn_is_called', export_fn=export_fn) train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: 1, hooks=[_FakeHook()], throttle_secs=100, export_strategies=export_strategy) # should be called 3 times. mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: train_spec.max_steps - 100 }, { _GLOBAL_STEP_KEY: train_spec.max_steps - 50 }, { _GLOBAL_STEP_KEY: train_spec.max_steps }] executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() self.assertEqual(3, mock_est.train.call_count) self.assertEqual(3, mock_est.evaluate.call_count) self.assertEqual(3, mock_est.times_export_fn_was_called)
def test_errors_out_if_throttle_secs_is_zero(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1, throttle_secs=0) executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, 'throttle_secs'): executor.run_local()
def test_send_stop_at_secs_to_train(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: 1, hooks=[_FakeHook()], throttle_secs=100) mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps} executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() stop_hook = mock_est.train.call_args[1]['hooks'][-1] self.assertIsInstance(stop_hook, training._StopAtSecsHook) self.assertEqual(eval_spec.throttle_secs, stop_hook._stop_after_secs)
def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // len(train_distribute.worker_devices), shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // len(eval_distribute.worker_devices) else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn(x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), estimator_training.EvalSpec(name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter( EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1)) return estimator
def test_runs_in_a_loop_until_max_steps(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec(input_fn=lambda: 1, hooks=[_FakeHook()], throttle_secs=100) # should be called 3 times. mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: train_spec.max_steps - 100 }, { _GLOBAL_STEP_KEY: train_spec.max_steps - 50 }, { _GLOBAL_STEP_KEY: train_spec.max_steps }] executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() self.assertEqual(3, mock_est.train.call_count) self.assertEqual(3, mock_est.evaluate.call_count)
def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None, use_train_and_evaluate=True): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync, shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns eval_spec = estimator_training.EvalSpec( name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter(EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1) if use_train_and_evaluate: estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), eval_spec) else: estimator.train(train_input_fn, max_steps=MAX_STEPS) latest_ckpt_path = estimator.latest_checkpoint() metrics = estimator.evaluate(eval_input_fn, checkpoint_path=latest_ckpt_path, name=EVAL_NAME) # Export the eval result to files. eval_result = estimator_training._EvalResult( status=estimator_training._EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) evaluator = estimator_training._TrainingExecutor._Evaluator(estimator, eval_spec, None) evaluator._export_eval_result(eval_result, True) return estimator
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testInvalidHook(self): with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG): training.TrainSpec(input_fn=lambda: 1, hooks=[_InvalidHook()])
def testInvalidMaxStep(self): with self.assertRaisesRegexp(ValueError, _INVALID_MAX_STEPS_MSG): training.TrainSpec(input_fn=lambda: 1, max_steps=0)
def testInvalidInputFn(self): with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG): training.TrainSpec(input_fn='invalid')
def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" spec = training.TrainSpec(input_fn=lambda: 1) self.assertEqual(1, spec.input_fn()) self.assertIsNone(spec.max_steps) self.assertEqual(0, len(spec.hooks))