def test_train_and_evaluate(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy) ex.train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count) self.assertEqual(1, len(est.monitors)) self.assertEqual([noop_hook], est.eval_hooks) self.assertTrue(isinstance(est.monitors[0], session_run_hook.SessionRunHook))
def test_continuous_train_and_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): if isinstance(est, core_estimator.Estimator): eval_metrics = None saving_listeners = 'saving_listeners' else: eval_metrics = 'eval_metrics' saving_listeners = None noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy, saving_listeners=saving_listeners) ex.continuous_train_and_eval() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_min_eval_frequency_defaults(self): def dummy_model_fn(features, labels): # pylint: disable=unused-argument pass estimator = core_estimator.Estimator(dummy_model_fn, '/tmp/dummy') ex = experiment.Experiment( estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 1)
def test_run_std_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig( master='host2:2222', num_cores=15, gpu_memory_fraction=0.314,) for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. ex.run_std_server() # Assert. mock_server.assert_has_calls( [test.mock.call().start(), test.mock.call().join()])
def test_continuous_train_and_eval_with_predicate_fn(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, train_steps=100000000000, # a value will make `ex` never stops. eval_steps=100, export_strategies=export_strategy) def predicate_fn(eval_result): del eval_result # unused. for fn signature. return False ex.continuous_train_and_eval(continuous_eval_predicate_fn=predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(0, est.eval_count) self.assertEqual(0, est.export_count)
def test_continuous_eval_evaluates_checkpoint_once(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() result = { 'called': 0, 'called_with_eval_result': 0, } # pylint: disable=cell-var-from-loop def _predicate_fn(eval_result): result['called'] += 1 if eval_result: # If eval_result is not empty nor None, the checkpoint has been # evaluated. result['called_with_eval_result'] += 1 # With 300 times of evaluation, this should prove something. return result['called'] < 300 # pylint: enable=cell-var-from-loop ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval(evaluate_checkpoint_only_once=True, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(300, result['called']) self.assertEqual(1, result['called_with_eval_result'])
def test_continuous_eval_predicate_fn_with_checkpoint(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() noop_hook = _NoopHook() def _predicate_fn(eval_result, checkpoint_path): self.assertEqual(eval_result is None, checkpoint_path is None) return est.eval_count < 3 # pylint: disable=cell-var-from-loop ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval( evaluate_checkpoint_only_once=False, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(3, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_checkpoint_and_export(self): model_dir = tempfile.mkdtemp() config = run_config_lib.RunConfig(save_checkpoints_steps=3) est = dnn.DNNClassifier( n_classes=3, feature_columns=[ feature_column.real_valued_column('feature', dimension=4) ], hidden_units=[3, 3], model_dir=model_dir, config=config) exp_strategy = saved_model_export_utils.make_export_strategy( est, 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn=test_data.iris_input_multiclass_fn, eval_input_fn=test_data.iris_input_multiclass_fn, export_strategies=(exp_strategy,), train_steps=8, checkpoint_and_export=True, eval_delay_secs=0) with test.mock.patch.object(ex, '_maybe_export'): with test.mock.patch.object(ex, '_call_evaluate'): ex.train_and_evaluate() # Eval and export are called after steps 1, 4, 7, and 8 (after training # is completed). self.assertEqual(ex._maybe_export.call_count, 4) self.assertEqual(ex._call_evaluate.call_count, 4)
def test_invalid_export_strategies(self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies='not_an_export_strategy') with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=['not_an_export_srategy'])
def test_continuous_train_and_eval_with_invalid_predicate_fn(self): for est in self._estimators_for_tests(): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') with self.assertRaisesRegexp( ValueError, '`continuous_eval_predicate_fn` must be a callable'): ex.continuous_train_and_eval(continuous_eval_predicate_fn='fn')
def test_continuous_train_and_eval_with_invalid_train_steps_iterations(self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp( ValueError, '`train_steps_per_iteration` must be an integer.'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps_per_iteration='123')
def test_run_std_server_raises_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): with self.assertRaises(ValueError): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.run_std_server()
def test_train_delay(self): for est in self._estimators_for_tests(): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4)
def test_eval_metrcis_for_core_estimator(self): est = TestCoreEstimator() with self.assertRaisesRegexp( ValueError, '`eval_metrics` must be `None`'): experiment.Experiment( est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', eval_metrics='eval_metrics')
def test_train_server_does_not_start_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because there was no ClusterSpec. self.assertFalse(mock_server.called)
def test_train_server_does_not_start_with_empty_master(self, mock_server): tf_config = {'cluster': self._cluster_spec()} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig(master='') for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because master was the empty string. self.assertFalse(mock_server.called)
def test_train_default_delay(self): for task_id in [0, 1, 3]: tf_config = {'task': {'index': task_id}} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config.RunConfig() for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train() self.assertAlmostEqual(task_id * 5, sheep.time(), delta=1e-4)
def test_evaluate_delay(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook]) for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4) self.assertEqual([noop_hook], est.eval_hooks)
def test_default_output_alternative_key_core_estimator(self): est = TestCoreEstimator() export_strategy = saved_model_export_utils.make_export_strategy( est, default_output_alternative_key='export_key', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=export_strategy) with self.assertRaisesRegexp( ValueError, 'default_output_alternative_key is not supported'): ex.train_and_evaluate()
def test_evaluate(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], eval_steps='steps', eval_delay_secs=0) ex.evaluate() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_train_and_evaluate_with_no_eval_during_training(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, min_eval_frequency=0) ex.train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(0, len(est.monitors))
def test_train(self): for est in self._estimators_for_tests(): if isinstance(est, core_estimator.Estimator): eval_metrics = None saving_listeners = 'saving_listeners' else: eval_metrics = 'eval_metrics' saving_listeners = None ex = experiment.Experiment( est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', eval_metrics=eval_metrics, saving_listeners=saving_listeners) fit_args = ex.train(delay_secs=0) self.assertEqual(1, est.fit_count) self.assertIn(('max_steps', 'train_steps'), fit_args) self.assertEqual(0, est.eval_count)
def test_continuous_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) self.assertRaises(StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False) self.assertEqual(0, est.fit_count) self.assertEqual(6, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_export_strategies_reset(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None export_strategy_1 = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_1', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, train_steps=100, eval_steps=100, export_strategies=(export_strategy_1,)) ex.train_and_evaluate() self.assertEqual(1, est.export_count) # After reset with empty list (None), the count does not change and the # user provided export strategy list should remain intact. old_es = ex.reset_export_strategies() ex.train_and_evaluate() self.assertAllEqual([export_strategy_1], old_es) self.assertEqual(1, est.export_count) # After reset with list, the count should increase with the number of # items. export_strategy_2 = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_2', exports_to_keep=None) export_strategy_3 = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_3', exports_to_keep=None) old_es = ex.reset_export_strategies( [export_strategy_2, export_strategy_3]) ex.train_and_evaluate() self.assertAllEqual([], old_es) self.assertEqual(3, est.export_count)
def test_continuous_eval_ends_after_train_step(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0, train_steps=100) ex.continuous_eval() self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_train_hooks_extend_does_not_mutate_input_hooks(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None noop_hook = _NoopHook() input_hooks = [noop_hook] ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, train_monitors=input_hooks) self.assertAllEqual([noop_hook], ex._train_monitors) another_noop_hook = _NoopHook() # Assert that the extend API mutates the hooks, but not the input hooks ex.extend_train_hooks([another_noop_hook]) self.assertAllEqual([noop_hook, another_noop_hook], ex._train_monitors) self.assertAllEqual([noop_hook], input_hooks)
def test_train_raises_if_job_name_is_missing(self): tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'index': 1 } } with test.mock.patch.dict( 'os.environ', {'TF_CONFIG': json.dumps(tf_config)}), self.assertRaises(ValueError): config = run_config_lib.RunConfig( master='host3:2222' # Normally selected by task type. ) for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train()
def test_train_starts_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'environment': run_config_lib.Environment.CLOUD, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig( master='host4:2222', num_cores=15, gpu_memory_fraction=0.314) for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. # We want to make sure we discount the time it takes to start the server # in our accounting of the delay, so we set a small delay here. sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=1) # Ensure that the delay takes into account the time to start server. self.assertAlmostEqual(1, sheep.time(), delta=1e-4) # Assert. expected_config_proto = config_pb2.ConfigProto() expected_config_proto.inter_op_parallelism_threads = 15 expected_config_proto.intra_op_parallelism_threads = 15 expected_config_proto.gpu_options.per_process_gpu_memory_fraction = 0.314 mock_server.assert_called_with( config.cluster_spec, job_name=run_config_lib.TaskType.WORKER, task_index=1, config=expected_config_proto, start=False) mock_server.assert_has_calls([test.mock.call().start()])
def test_run_local(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, local_eval_frequency=10) ex.local_run() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, len(est.monitors)) self.assertEqual([noop_hook], est.eval_hooks) self.assertTrue(isinstance(est.monitors[0], session_run_hook.SessionRunHook))
def test_test(self): for est in self._estimators_for_tests(): exp_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) if isinstance(est, core_estimator.Estimator): eval_metrics = None saving_listeners = 'saving_listeners' else: eval_metrics = 'eval_metrics' saving_listeners = None ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', export_strategies=(exp_strategy,), eval_metrics=eval_metrics, saving_listeners=saving_listeners) ex.test() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count)