def test_tpu_estimator_simple_lifecycle(self, use_tpu): config = tf.contrib.tpu.RunConfig(master="", tf_random_seed=42) estimator = TPUEstimator( head=tu.head(), subnetwork_generator=SimpleGenerator( [_DNNBuilder("dnn", use_tpu=use_tpu)]), max_iteration_steps=200, mixture_weight_initializer=tf.zeros_initializer(), use_bias=True, model_dir=self.test_subdirectory, config=config, use_tpu=use_tpu, train_batch_size=64 if use_tpu else 0) max_steps = 300 xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] xor_labels = [[1.], [0.], [1.], [0.]] train_input_fn = tu.dummy_input_fn(xor_features, xor_labels) # Train. estimator.train(input_fn=train_input_fn, steps=None, max_steps=max_steps, hooks=None) # Evaluate. eval_results = estimator.evaluate(input_fn=train_input_fn, steps=10, hooks=None) # Predict. # TODO: skip predictions on TF versions 1.11 and 1.12 since # some TPU hooks seem to be failing on predict. predictions = [] tf_version = LooseVersion(tf.VERSION) if (tf_version != LooseVersion("1.11.0") and tf_version != LooseVersion("1.12.0")): predictions = estimator.predict( input_fn=tu.dataset_input_fn(features=[0., 0.], labels=None)) # Export SavedModel. def serving_input_fn(): """Input fn for serving export, starting from serialized example.""" serialized_example = tf.placeholder(dtype=tf.string, shape=(None), name="serialized_example") return tf.estimator.export.ServingInputReceiver( features={"x": tf.constant([[0., 0.]], name="serving_x")}, receiver_tensors=serialized_example) export_saved_model_fn = getattr(estimator, "export_saved_model", None) if not callable(export_saved_model_fn): export_saved_model_fn = estimator.export_savedmodel export_saved_model_fn(export_dir_base=estimator.model_dir, serving_input_receiver_fn=serving_input_fn) self.assertAlmostEqual(0.32416, eval_results["loss"], places=3) self.assertEqual(max_steps, eval_results["global_step"]) for prediction in predictions: self.assertIsNotNone(prediction["predictions"])
def test_tpu_estimator_summaries(self): config = tf.contrib.tpu.RunConfig(tf_random_seed=42) estimator = TPUEstimator( head=tu.head(), subnetwork_generator=SimpleGenerator([_DNNBuilder("dnn")]), max_iteration_steps=200, model_dir=self.test_subdirectory, config=config) train_input_fn = tu.dummy_input_fn([[1., 0.]], [[1.]]) with fake_run_on_tpu(): estimator.train(input_fn=train_input_fn, max_steps=3) estimator.evaluate(input_fn=train_input_fn, steps=3) self.assertFalse( _summaries_exist(self.test_subdirectory + "/candidate/t0_dnn")) self.assertTrue( _summaries_exist(self.test_subdirectory + "/candidate/t0_dnn/eval"))
def train_and_evaluate_estimator(): """Runs Estimator distributed training.""" # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment # variables during construction. # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig. config = tf.estimator.RunConfig(tf_random_seed=42, model_dir=os.environ["MODEL_DIR"]) head = tf.contrib.estimator.regression_head( loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE) subnetwork_generator = SimpleGenerator([ _DNNBuilder("dnn1", config, layer_size=3), _DNNBuilder("dnn2", config, layer_size=4), _DNNBuilder("dnn3", config, layer_size=5), ]) estimator = Estimator( head=head, subnetwork_generator=subnetwork_generator, max_iteration_steps=100, force_grow=True, delay_secs_per_worker=.2, max_worker_delay_secs=1, worker_wait_secs=.5, # Set low timeout to reduce wait time for failures. worker_wait_timeout_secs=60, config=config) def input_fn(): xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] xor_labels = [[1.], [0.], [1.], [0.]] input_features = {"x": tf.constant(xor_features, name="x")} input_labels = tf.constant(xor_labels, name="y") return input_features, input_labels # Train for three iterations. train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=500) eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1) # Calling train_and_evaluate is the official way to perform distributed # training with an Estimator. Calling Estimator#train directly results # in an error when the TF_CONFIG is setup for a cluster. tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate_estimator(): """Runs Estimator distributed training.""" # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment # variables during construction. # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig. config = tf.estimator.RunConfig( tf_random_seed=42, model_dir=FLAGS.model_dir, session_config=tf.ConfigProto( log_device_placement=False, # Ignore other workers; only talk to parameter servers. # Otherwise, when a chief/worker terminates, the others will hang. device_filters=["/job:ps"])) head = tf.contrib.estimator.regression_head( loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE) kwargs = { "max_iteration_steps": 100, "force_grow": True, "delay_secs_per_worker": .2, "max_worker_delay_secs": 1, "worker_wait_secs": .5, # Set low timeout to reduce wait time for failures. "worker_wait_timeout_secs": 60, "config": config } if FLAGS.estimator_type == "autoensemble": feature_columns = [tf.feature_column.numeric_column("x", shape=[2])] if hasattr(tf.estimator, "LinearEstimator"): linear_estimator_fn = tf.estimator.LinearEstimator else: linear_estimator_fn = tf.contrib.estimator.LinearEstimator if hasattr(tf.estimator, "DNNEstimator"): dnn_estimator_fn = tf.estimator.DNNEstimator else: dnn_estimator_fn = tf.contrib.estimator.DNNEstimator candidate_pool = { "linear": linear_estimator_fn( head=head, feature_columns=feature_columns, optimizer=tf.train.AdamOptimizer(learning_rate=.001)), "dnn": dnn_estimator_fn( head=head, feature_columns=feature_columns, optimizer=tf.train.AdamOptimizer(learning_rate=.001), hidden_units=[3]), "dnn2": dnn_estimator_fn( head=head, feature_columns=feature_columns, optimizer=tf.train.AdamOptimizer(learning_rate=.001), hidden_units=[5]) } estimator = AutoEnsembleEstimator(head=head, candidate_pool=candidate_pool, **kwargs) elif FLAGS.estimator_type == "estimator": subnetwork_generator = SimpleGenerator([ _DNNBuilder("dnn1", config, layer_size=3), _DNNBuilder("dnn2", config, layer_size=4), _DNNBuilder("dnn3", config, layer_size=5), ]) estimator = Estimator(head=head, subnetwork_generator=subnetwork_generator, **kwargs) def input_fn(): xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] xor_labels = [[1.], [0.], [1.], [0.]] input_features = {"x": tf.constant(xor_features, name="x")} input_labels = tf.constant(xor_labels, name="y") return input_features, input_labels # Train for three iterations. train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=300) eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1) # Calling train_and_evaluate is the official way to perform distributed # training with an Estimator. Calling Estimator#train directly results # in an error when the TF_CONFIG is setup for a cluster. tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def test_tpu_estimator_summaries(self, use_tpu): config = tf.contrib.tpu.RunConfig(tf_random_seed=42, save_summary_steps=2, log_step_count_steps=1) assert config.log_step_count_steps estimator = TPUEstimator(head=tu.head(), subnetwork_generator=SimpleGenerator( [_DNNBuilder("dnn", use_tpu=use_tpu)]), max_iteration_steps=200, model_dir=self.test_subdirectory, config=config, use_tpu=use_tpu, train_batch_size=64 if use_tpu else 0) xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] xor_labels = [[1.], [0.], [1.], [0.]] train_input_fn = tu.dummy_input_fn(xor_features, xor_labels) estimator.train(input_fn=train_input_fn, max_steps=3) estimator.evaluate(input_fn=train_input_fn, steps=3) ensemble_loss = .5 self.assertAlmostEqual(ensemble_loss, _check_eventfile_for_keyword( "loss", self.test_subdirectory), places=1) self.assertIsNotNone( _check_eventfile_for_keyword("global_step/sec", self.test_subdirectory)) eval_subdir = os.path.join(self.test_subdirectory, "eval") self.assertAlmostEqual(ensemble_loss, _check_eventfile_for_keyword( "loss", eval_subdir), places=1) self.assertEqual( 0., _check_eventfile_for_keyword("iteration/adanet/iteration", self.test_subdirectory)) candidate_subdir = os.path.join(self.test_subdirectory, "candidate/t0_dnn") self.assertAlmostEqual(3., _check_eventfile_for_keyword( "scalar", candidate_subdir), places=3) self.assertEqual((3, 3, 1), _check_eventfile_for_keyword("image/image/0", candidate_subdir)) self.assertAlmostEqual(5., _check_eventfile_for_keyword( "nested/scalar", candidate_subdir), places=1) self.assertAlmostEqual( ensemble_loss, _check_eventfile_for_keyword( "adanet_loss/adanet/adanet_weighted_ensemble", candidate_subdir), places=1) self.assertAlmostEqual( 0., _check_eventfile_for_keyword( "complexity_regularization/adanet/adanet_weighted_ensemble", candidate_subdir), places=1) self.assertAlmostEqual(1., _check_eventfile_for_keyword( "mixture_weight_norms/adanet/" "adanet_weighted_ensemble/subnetwork_0", candidate_subdir), places=1)
def test_tpu_estimator_summaries(self, use_tpu): config = tf.contrib.tpu.RunConfig(tf_random_seed=42, save_summary_steps=2, log_step_count_steps=1) assert config.log_step_count_steps estimator = TPUEstimator(head=tu.head(), subnetwork_generator=SimpleGenerator( [_DNNBuilder("dnn", use_tpu=use_tpu)]), max_iteration_steps=200, model_dir=self.test_subdirectory, config=config, use_tpu=use_tpu, train_batch_size=64 if use_tpu else 0) xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]] xor_labels = [[1.], [0.], [1.], [0.]] train_input_fn = tu.dummy_input_fn(xor_features, xor_labels) estimator.train(input_fn=train_input_fn, max_steps=3) estimator.evaluate(input_fn=train_input_fn, steps=3) subnetwork_subdir = os.path.join(self.test_subdirectory, "subnetwork/t0_dnn") ensemble_loss = .5 ensemble_subdir = os.path.join( self.test_subdirectory, "ensemble/t0_dnn_grow_complexity_regularized") self.assertAlmostEqual(ensemble_loss, _get_summary_value("loss", self.test_subdirectory), places=1) self.assertEqual( 0., _get_summary_value("iteration/adanet/iteration", self.test_subdirectory)) self.assertAlmostEqual(3., _get_summary_value("scalar", subnetwork_subdir), places=3) self.assertEqual((3, 3, 1), _get_summary_value("image/image/0", subnetwork_subdir)) self.assertAlmostEqual(5., _get_summary_value("nested/scalar", subnetwork_subdir), places=3) self.assertAlmostEqual( ensemble_loss, _get_summary_value("adanet_loss/adanet/adanet_weighted_ensemble", ensemble_subdir), places=1) self.assertAlmostEqual( 0., _get_summary_value( "complexity_regularization/adanet/adanet_weighted_ensemble", ensemble_subdir), places=1) self.assertAlmostEqual(1., _get_summary_value( "mixture_weight_norms/adanet/" "adanet_weighted_ensemble/subnetwork_0", ensemble_subdir), places=1) # Eval metric summaries are always written out during eval. subnetwork_eval_subdir = os.path.join(subnetwork_subdir, "eval") if use_tpu: # TODO: Why is subnetwork eval loss 0.0 when use_tpu=False? self.assertAlmostEqual(ensemble_loss, _get_summary_value("loss", subnetwork_eval_subdir), places=1) self.assertAlmostEqual(ensemble_loss, _get_summary_value("average_loss", subnetwork_eval_subdir), places=1) eval_subdir = os.path.join(self.test_subdirectory, "eval") ensemble_eval_subdir = os.path.join(ensemble_subdir, "eval") for subdir in [ensemble_eval_subdir, eval_subdir]: self.assertEqual([b"| dnn |"], _get_summary_value( "architecture/adanet/ensembles/0", subdir)) if subdir == eval_subdir: self.assertAlmostEqual(ensemble_loss, _get_summary_value("loss", subdir), places=1) self.assertAlmostEqual(ensemble_loss, _get_summary_value("average_loss", subdir), places=1)