def test_min_eval_frequency_defaults(self): def dummy_model_fn(features, labels): # pylint: disable=unused-argument pass # The default value when model_dir is on GCS is 1000 estimator = core_estimator.Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = experiment.Experiment( estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 1000) # The default value when model_dir is not on GCS is 1 estimator = core_estimator.Estimator(dummy_model_fn, '/tmp/dummy') ex = experiment.Experiment( estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 1) # Make sure default not used when explicitly set estimator = core_estimator.Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = experiment.Experiment( estimator, min_eval_frequency=123, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 123) # Make sure default not used when explicitly set as 0 estimator = core_estimator.Estimator(dummy_model_fn, 'gs://dummy_bucket') ex = experiment.Experiment( estimator, min_eval_frequency=0, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 0)
def test_min_eval_frequency_defaults(self): def dummy_model_fn(features, labels): # pylint: disable=unused-argument pass estimator = core_estimator.Estimator(dummy_model_fn, '/tmp/dummy') ex = experiment.Experiment( estimator, train_input_fn=None, eval_input_fn=None) self.assertEquals(ex._min_eval_frequency, 1)
def test_continuous_eval_predicate_fn_with_checkpoint(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() noop_hook = _NoopHook() def _predicate_fn(eval_result, checkpoint_path): self.assertEqual(not eval_result, checkpoint_path is None) return est.eval_count < 3 # pylint: disable=cell-var-from-loop ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval( evaluate_checkpoint_only_once=False, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(3, est.eval_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_export_strategies_reset(self): est = TestEstimator() export_strategy_1 = saved_model_export_utils.make_export_strategy( est, 'export_input_1', exports_to_keep=None) ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', train_steps=100, eval_steps=100, export_strategies=[export_strategy_1]) ex.train_and_evaluate() self.assertEqual(1, est.export_count) # After reset with empty list (None), the count does not change and the user # provided export strategy list should remain intact. old_es = ex.reset_export_strategies() ex.train_and_evaluate() self.assertAllEqual([export_strategy_1], old_es) self.assertEqual(1, est.export_count) # After reset with list, the count should increase with the number of items. export_strategy_2 = saved_model_export_utils.make_export_strategy( est, 'export_input_2', exports_to_keep=None) export_strategy_3 = saved_model_export_utils.make_export_strategy( est, 'export_input_3', exports_to_keep=None) old_es = ex.reset_export_strategies( [export_strategy_2, export_strategy_3]) ex.train_and_evaluate() self.assertAllEqual([], old_es) self.assertEqual(3, est.export_count)
def test_checkpoint_and_export(self): model_dir = tempfile.mkdtemp() config = run_config_lib.RunConfig(save_checkpoints_steps=3) est = dnn.DNNClassifier(n_classes=3, feature_columns=[ feature_column.real_valued_column( 'feature', dimension=4) ], hidden_units=[3, 3], model_dir=model_dir, config=config) exp_strategy = saved_model_export_utils.make_export_strategy( est, 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn=test_data.iris_input_multiclass_fn, eval_input_fn=test_data.iris_input_multiclass_fn, export_strategies=(exp_strategy, ), train_steps=8, checkpoint_and_export=True, eval_delay_secs=0) with test.mock.patch.object(ex, '_maybe_export'): with test.mock.patch.object(ex, '_call_evaluate'): ex.train_and_evaluate() # Eval and export are called after steps 1, 4, 7, and 8 (after training # is completed). self.assertEqual(ex._maybe_export.call_count, 4) self.assertEqual(ex._call_evaluate.call_count, 4)
def test_continuous_eval_evaluates_checkpoint_once(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None est.fake_checkpoint() result = { 'called': 0, 'called_with_eval_result': 0, } # pylint: disable=cell-var-from-loop def _predicate_fn(eval_result): result['called'] += 1 if eval_result: # If eval_result is not empty nor None, the checkpoint has been # evaluated. result['called_with_eval_result'] += 1 # With 300 times of evaluation, this should prove something. return result['called'] < 300 # pylint: enable=cell-var-from-loop ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_delay_secs=0, continuous_eval_throttle_secs=0) ex.continuous_eval(evaluate_checkpoint_only_once=True, continuous_eval_predicate_fn=_predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(300, result['called']) self.assertEqual(1, result['called_with_eval_result'])
def test_run_std_server(self, mock_server): # Arrange. tf_config = { 'cluster': self._cluster_spec(), 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig( master='host2:2222', num_cores=15, gpu_memory_fraction=0.314,) for est in self._estimators_for_tests(config): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') # Act. ex.run_std_server() # Assert. mock_server.assert_has_calls( [test.mock.call().start(), test.mock.call().join()])
def test_train_and_evaluate(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy) ex.train_and_evaluate() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count) self.assertEqual(1, len(est.monitors)) self.assertEqual([noop_hook], est.eval_hooks) self.assertTrue(isinstance(est.monitors[0], session_run_hook.SessionRunHook))
def test_run_std_server_raises_without_cluster_spec(self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') with self.assertRaises(ValueError): ex = experiment.Experiment(TestEstimator(config), train_input_fn='train_input', eval_input_fn='eval_input') ex.run_std_server()
def test_continuous_train_and_eval(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): if isinstance(est, core_estimator.Estimator): eval_metrics = None saving_listeners = 'saving_listeners' else: eval_metrics = 'eval_metrics' saving_listeners = None noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance( est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy, saving_listeners=saving_listeners) ex.continuous_train_and_eval() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count) self.assertEqual([noop_hook], est.eval_hooks)
def test_continuous_train_and_eval_with_predicate_fn(self): for est in self._estimators_for_tests(eval_dict={'global_step': 100}): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None export_strategy = saved_model_export_utils.make_export_strategy( est, None if isinstance(est, core_estimator.Estimator) else 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics=eval_metrics, train_steps=100000000000, # a value will make `ex` never stops. eval_steps=100, export_strategies=export_strategy) def predicate_fn(eval_result): del eval_result # unused. for fn signature. return False ex.continuous_train_and_eval(continuous_eval_predicate_fn=predicate_fn) self.assertEqual(0, est.fit_count) self.assertEqual(0, est.eval_count) self.assertEqual(0, est.export_count)
def test_continuous_eval_evaluates_checkpoint_once(self): # Temporarily disabled until we figure out the threading story on Jenkins. return # pylint: disable=unreachable # The TestEstimator will raise StopIteration the second time evaluate is # called. ex = experiment.Experiment(TestEstimator(max_evals=1), train_input_fn='train_input', eval_input_fn='eval_input') # This should not happen if the logic restricting evaluation of the same # checkpoint works. We do need some checkpoint though, otherwise Experiment # will never evaluate. ex.estimator.fake_checkpoint() # Start a separate thread with continuous eval thread = threading.Thread(target=lambda: ex.continuous_eval( delay_secs=0, throttle_delay_secs=0)) thread.start() # The thread will die if it evaluates twice, and we should never evaluate # twice since we don't write another checkpoint. Since we did not enable # throttling, if it hasn't died after two seconds, we're good. thread.join(2) self.assertTrue(thread.is_alive()) # But we should have evaluated once. count = ex.estimator.eval_count self.assertEquals(1, count)
def test_test(self): est = TestEstimator() ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.test() self.assertEquals(1, est.fit_count) self.assertEquals(1, est.eval_count)
def test_eval_metrcis_for_core_estimator(self): est = TestCoreEstimator() with self.assertRaisesRegexp(ValueError, '`eval_metrics` must be `None`'): experiment.Experiment(est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', eval_metrics='eval_metrics')
def test_continuous_train_and_eval_with_invalid_train_steps_iterations(self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp( ValueError, '`train_steps_per_iteration` must be an integer.'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps_per_iteration='123')
def test_train_delay(self): est = TestEstimator() ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') for delay in [0, 1, 3]: with test.mock.patch('time.sleep', SheepCounter()) as sheep: ex.train(delay_secs=delay) self.assertAlmostEqual(delay, sheep.total_time, delta=0.1)
def test_continuous_train_and_eval_with_invalid_predicate_fn(self): for est in self._estimators_for_tests(): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') with self.assertRaisesRegexp( ValueError, '`continuous_eval_predicate_fn` must be a callable'): ex.continuous_train_and_eval(continuous_eval_predicate_fn='fn')
def test_invalid_export_strategies(self): for est in self._estimators_for_tests(): with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies='not_an_export_strategy') with self.assertRaisesRegexp(ValueError, 'ExportStrategy'): experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', train_steps=100, eval_steps=100, export_strategies=['not_an_export_srategy'])
def test_train_delay(self): for est in self._estimators_for_tests(): ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input') for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.train(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4)
def test_train_server_does_not_start_without_cluster_spec( self, mock_server): config = run_config_lib.RunConfig(master='host4:2222') ex = experiment.Experiment(TestEstimator(config), train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because there was no ClusterSpec. self.assertFalse(mock_server.called)
def main(_): training_data_path, test_data_path = maybe_download_data(FLAGS.data_dir) # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=training_data_path, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=test_data_path, target_dtype=np.int, features_dtype=np.float32) # Specify that all features have real-value data feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] # Build 3 layer DNN with 10, 20, 10 units respectively. model_dir = FLAGS.model_dir or tempfile.mkdtemp( prefix="debug_tflearn_iris_") classifier = tf.contrib.learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir) hooks = ([tf_debug.LocalCLIDebugHook( ui_type=FLAGS.ui_type)] if FLAGS.debug else None) if not FLAGS.use_experiment: # Fit model. classifier.fit(x=training_set.data, y=training_set.target, steps=FLAGS.train_steps, monitors=hooks) # Evaluate accuracy. accuracy_score = classifier.evaluate(x=test_set.data, y=test_set.target, hooks=hooks)["accuracy"] else: ex = experiment.Experiment(classifier, train_input_fn=iris_input_fn, eval_input_fn=iris_input_fn, train_steps=FLAGS.train_steps, eval_delay_secs=0, eval_steps=1, train_monitors=hooks, eval_hooks=hooks) ex.train() accuracy_score = ex.evaluate()["accuracy"] print("After training %d steps, Accuracy = %f" % (FLAGS.train_steps, accuracy_score))
def test_train(self): est = TestEstimator() ex = experiment.Experiment(est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', eval_metrics='eval_metrics') fit_args = ex.train(delay_secs=0) self.assertEquals(1, est.fit_count) self.assertIn(('max_steps', 'train_steps'), fit_args) self.assertEquals(0, est.eval_count)
def test_test(self): for est in self._estimators_for_tests(): exp_strategy = saved_model_export_utils.make_export_strategy( est, 'export_input', exports_to_keep=None) ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', export_strategies=[exp_strategy]) ex.test() self.assertEqual(1, est.fit_count) self.assertEqual(1, est.eval_count) self.assertEqual(1, est.export_count)
def test_evaluate(self): est = TestEstimator() est.fake_checkpoint() ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', eval_steps='steps', eval_delay_secs=0) ex.evaluate() self.assertEquals(1, est.eval_count) self.assertEquals(0, est.fit_count)
def test_train_server_does_not_start_with_empty_master(self, mock_server): tf_config = {'cluster': self._cluster_spec()} with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): config = run_config_lib.RunConfig(master='') for est in self._estimators_for_tests(config): ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input') ex.train() # The server should not have started because master was the empty string. self.assertFalse(mock_server.called)
def test_train(self): for est in self._estimators_for_tests(): eval_metrics = 'eval_metrics' if not isinstance( est, core_estimator.Estimator) else None ex = experiment.Experiment(est, train_input_fn='train_input', train_steps='train_steps', eval_input_fn='eval_input', eval_metrics=eval_metrics) fit_args = ex.train(delay_secs=0) self.assertEqual(1, est.fit_count) self.assertIn(('max_steps', 'train_steps'), fit_args) self.assertEqual(0, est.eval_count)
def test_evaluate_delay(self): est = TestEstimator() est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook]) for delay in [0, 1, 3]: with test.mock.patch('time.sleep', SheepCounter()) as sheep: ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.total_time, delta=0.1) self.assertEquals([noop_hook], est.eval_hooks)
def test_continuous_eval(self): est = TestEstimator() est.fake_checkpoint() ex = experiment.Experiment(est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', eval_delay_secs=0, continuous_eval_throttle_secs=0) self.assertRaises(StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False) self.assertEquals(6, est.eval_count) self.assertEquals(0, est.fit_count)
def test_fail_with_tpu_estimator(self): def dummy_model_fn(features, labels): del features, labels # unused with self.assertRaisesRegexp( ValueError, '`Experiment` class cannot work with `tf.contrib.tpu.TPUEstimator`' ): experiment.Experiment(tpu_estimator.TPUEstimator( model_fn=dummy_model_fn, config=tpu_config.RunConfig(), train_batch_size=256), train_input_fn='train_input', eval_input_fn='eval_input')
def test_evaluate_delay(self): for est in self._estimators_for_tests(): est.fake_checkpoint() noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_hooks=[noop_hook]) for delay in [0, 1, 3]: sheep = SheepCounter() with test.mock.patch.object(time, 'time', sheep.time): with test.mock.patch.object(time, 'sleep', sheep.sleep): ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.time(), delta=1e-4) self.assertEqual([noop_hook], est.eval_hooks)