Example #1
0
    def test_tpu_estimator_simple_lifecycle(self, use_tpu):
        config = tf.contrib.tpu.RunConfig(master="", tf_random_seed=42)
        estimator = TPUEstimator(
            head=tu.head(),
            subnetwork_generator=SimpleGenerator(
                [_DNNBuilder("dnn", use_tpu=use_tpu)]),
            max_iteration_steps=200,
            mixture_weight_initializer=tf.zeros_initializer(),
            use_bias=True,
            model_dir=self.test_subdirectory,
            config=config,
            use_tpu=use_tpu,
            train_batch_size=64 if use_tpu else 0)
        max_steps = 300

        xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
        xor_labels = [[1.], [0.], [1.], [0.]]
        train_input_fn = tu.dummy_input_fn(xor_features, xor_labels)

        # Train.
        estimator.train(input_fn=train_input_fn,
                        steps=None,
                        max_steps=max_steps,
                        hooks=None)

        # Evaluate.
        eval_results = estimator.evaluate(input_fn=train_input_fn,
                                          steps=10,
                                          hooks=None)

        # Predict.
        # TODO: skip predictions on TF versions 1.11 and 1.12 since
        # some TPU hooks seem to be failing on predict.
        predictions = []
        tf_version = LooseVersion(tf.VERSION)
        if (tf_version != LooseVersion("1.11.0")
                and tf_version != LooseVersion("1.12.0")):
            predictions = estimator.predict(
                input_fn=tu.dataset_input_fn(features=[0., 0.], labels=None))

        # Export SavedModel.
        def serving_input_fn():
            """Input fn for serving export, starting from serialized example."""
            serialized_example = tf.placeholder(dtype=tf.string,
                                                shape=(None),
                                                name="serialized_example")
            return tf.estimator.export.ServingInputReceiver(
                features={"x": tf.constant([[0., 0.]], name="serving_x")},
                receiver_tensors=serialized_example)

        export_saved_model_fn = getattr(estimator, "export_saved_model", None)
        if not callable(export_saved_model_fn):
            export_saved_model_fn = estimator.export_savedmodel
        export_saved_model_fn(export_dir_base=estimator.model_dir,
                              serving_input_receiver_fn=serving_input_fn)

        self.assertAlmostEqual(0.32416, eval_results["loss"], places=3)
        self.assertEqual(max_steps, eval_results["global_step"])
        for prediction in predictions:
            self.assertIsNotNone(prediction["predictions"])
Example #2
0
  def test_tpu_estimator_summaries(self):
    config = tf.contrib.tpu.RunConfig(tf_random_seed=42)
    estimator = TPUEstimator(
        head=tu.head(),
        subnetwork_generator=SimpleGenerator([_DNNBuilder("dnn")]),
        max_iteration_steps=200,
        model_dir=self.test_subdirectory,
        config=config)
    train_input_fn = tu.dummy_input_fn([[1., 0.]], [[1.]])

    with fake_run_on_tpu():
      estimator.train(input_fn=train_input_fn, max_steps=3)
    estimator.evaluate(input_fn=train_input_fn, steps=3)

    self.assertFalse(
        _summaries_exist(self.test_subdirectory + "/candidate/t0_dnn"))
    self.assertTrue(
        _summaries_exist(self.test_subdirectory + "/candidate/t0_dnn/eval"))
def train_and_evaluate_estimator():
    """Runs Estimator distributed training."""

    # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment
    # variables during construction.
    # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see
    # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig.
    config = tf.estimator.RunConfig(tf_random_seed=42,
                                    model_dir=os.environ["MODEL_DIR"])
    head = tf.contrib.estimator.regression_head(
        loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)

    subnetwork_generator = SimpleGenerator([
        _DNNBuilder("dnn1", config, layer_size=3),
        _DNNBuilder("dnn2", config, layer_size=4),
        _DNNBuilder("dnn3", config, layer_size=5),
    ])
    estimator = Estimator(
        head=head,
        subnetwork_generator=subnetwork_generator,
        max_iteration_steps=100,
        force_grow=True,
        delay_secs_per_worker=.2,
        max_worker_delay_secs=1,
        worker_wait_secs=.5,
        # Set low timeout to reduce wait time for failures.
        worker_wait_timeout_secs=60,
        config=config)

    def input_fn():
        xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
        xor_labels = [[1.], [0.], [1.], [0.]]
        input_features = {"x": tf.constant(xor_features, name="x")}
        input_labels = tf.constant(xor_labels, name="y")
        return input_features, input_labels

    # Train for three iterations.
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=500)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1)

    # Calling train_and_evaluate is the official way to perform distributed
    # training with an Estimator. Calling Estimator#train directly results
    # in an error when the TF_CONFIG is setup for a cluster.
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Example #4
0
def train_and_evaluate_estimator():
    """Runs Estimator distributed training."""

    # The tf.estimator.RunConfig automatically parses the TF_CONFIG environment
    # variables during construction.
    # For more information on how tf.estimator.RunConfig uses TF_CONFIG, see
    # https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig.
    config = tf.estimator.RunConfig(
        tf_random_seed=42,
        model_dir=FLAGS.model_dir,
        session_config=tf.ConfigProto(
            log_device_placement=False,
            # Ignore other workers; only talk to parameter servers.
            # Otherwise, when a chief/worker terminates, the others will hang.
            device_filters=["/job:ps"]))
    head = tf.contrib.estimator.regression_head(
        loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)

    kwargs = {
        "max_iteration_steps": 100,
        "force_grow": True,
        "delay_secs_per_worker": .2,
        "max_worker_delay_secs": 1,
        "worker_wait_secs": .5,
        # Set low timeout to reduce wait time for failures.
        "worker_wait_timeout_secs": 60,
        "config": config
    }
    if FLAGS.estimator_type == "autoensemble":
        feature_columns = [tf.feature_column.numeric_column("x", shape=[2])]
        if hasattr(tf.estimator, "LinearEstimator"):
            linear_estimator_fn = tf.estimator.LinearEstimator
        else:
            linear_estimator_fn = tf.contrib.estimator.LinearEstimator
        if hasattr(tf.estimator, "DNNEstimator"):
            dnn_estimator_fn = tf.estimator.DNNEstimator
        else:
            dnn_estimator_fn = tf.contrib.estimator.DNNEstimator
        candidate_pool = {
            "linear":
            linear_estimator_fn(
                head=head,
                feature_columns=feature_columns,
                optimizer=tf.train.AdamOptimizer(learning_rate=.001)),
            "dnn":
            dnn_estimator_fn(
                head=head,
                feature_columns=feature_columns,
                optimizer=tf.train.AdamOptimizer(learning_rate=.001),
                hidden_units=[3]),
            "dnn2":
            dnn_estimator_fn(
                head=head,
                feature_columns=feature_columns,
                optimizer=tf.train.AdamOptimizer(learning_rate=.001),
                hidden_units=[5])
        }

        estimator = AutoEnsembleEstimator(head=head,
                                          candidate_pool=candidate_pool,
                                          **kwargs)

    elif FLAGS.estimator_type == "estimator":
        subnetwork_generator = SimpleGenerator([
            _DNNBuilder("dnn1", config, layer_size=3),
            _DNNBuilder("dnn2", config, layer_size=4),
            _DNNBuilder("dnn3", config, layer_size=5),
        ])

        estimator = Estimator(head=head,
                              subnetwork_generator=subnetwork_generator,
                              **kwargs)

    def input_fn():
        xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
        xor_labels = [[1.], [0.], [1.], [0.]]
        input_features = {"x": tf.constant(xor_features, name="x")}
        input_labels = tf.constant(xor_labels, name="y")
        return input_features, input_labels

    # Train for three iterations.
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=300)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=1)

    # Calling train_and_evaluate is the official way to perform distributed
    # training with an Estimator. Calling Estimator#train directly results
    # in an error when the TF_CONFIG is setup for a cluster.
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Example #5
0
    def test_tpu_estimator_summaries(self, use_tpu):
        config = tf.contrib.tpu.RunConfig(tf_random_seed=42,
                                          save_summary_steps=2,
                                          log_step_count_steps=1)
        assert config.log_step_count_steps
        estimator = TPUEstimator(head=tu.head(),
                                 subnetwork_generator=SimpleGenerator(
                                     [_DNNBuilder("dnn", use_tpu=use_tpu)]),
                                 max_iteration_steps=200,
                                 model_dir=self.test_subdirectory,
                                 config=config,
                                 use_tpu=use_tpu,
                                 train_batch_size=64 if use_tpu else 0)
        xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
        xor_labels = [[1.], [0.], [1.], [0.]]
        train_input_fn = tu.dummy_input_fn(xor_features, xor_labels)

        estimator.train(input_fn=train_input_fn, max_steps=3)
        estimator.evaluate(input_fn=train_input_fn, steps=3)

        ensemble_loss = .5
        self.assertAlmostEqual(ensemble_loss,
                               _check_eventfile_for_keyword(
                                   "loss", self.test_subdirectory),
                               places=1)
        self.assertIsNotNone(
            _check_eventfile_for_keyword("global_step/sec",
                                         self.test_subdirectory))
        eval_subdir = os.path.join(self.test_subdirectory, "eval")
        self.assertAlmostEqual(ensemble_loss,
                               _check_eventfile_for_keyword(
                                   "loss", eval_subdir),
                               places=1)
        self.assertEqual(
            0.,
            _check_eventfile_for_keyword("iteration/adanet/iteration",
                                         self.test_subdirectory))

        candidate_subdir = os.path.join(self.test_subdirectory,
                                        "candidate/t0_dnn")
        self.assertAlmostEqual(3.,
                               _check_eventfile_for_keyword(
                                   "scalar", candidate_subdir),
                               places=3)
        self.assertEqual((3, 3, 1),
                         _check_eventfile_for_keyword("image/image/0",
                                                      candidate_subdir))
        self.assertAlmostEqual(5.,
                               _check_eventfile_for_keyword(
                                   "nested/scalar", candidate_subdir),
                               places=1)
        self.assertAlmostEqual(
            ensemble_loss,
            _check_eventfile_for_keyword(
                "adanet_loss/adanet/adanet_weighted_ensemble",
                candidate_subdir),
            places=1)
        self.assertAlmostEqual(
            0.,
            _check_eventfile_for_keyword(
                "complexity_regularization/adanet/adanet_weighted_ensemble",
                candidate_subdir),
            places=1)
        self.assertAlmostEqual(1.,
                               _check_eventfile_for_keyword(
                                   "mixture_weight_norms/adanet/"
                                   "adanet_weighted_ensemble/subnetwork_0",
                                   candidate_subdir),
                               places=1)
Example #6
0
    def test_tpu_estimator_summaries(self, use_tpu):
        config = tf.contrib.tpu.RunConfig(tf_random_seed=42,
                                          save_summary_steps=2,
                                          log_step_count_steps=1)
        assert config.log_step_count_steps
        estimator = TPUEstimator(head=tu.head(),
                                 subnetwork_generator=SimpleGenerator(
                                     [_DNNBuilder("dnn", use_tpu=use_tpu)]),
                                 max_iteration_steps=200,
                                 model_dir=self.test_subdirectory,
                                 config=config,
                                 use_tpu=use_tpu,
                                 train_batch_size=64 if use_tpu else 0)
        xor_features = [[1., 0.], [0., 0], [0., 1.], [1., 1.]]
        xor_labels = [[1.], [0.], [1.], [0.]]
        train_input_fn = tu.dummy_input_fn(xor_features, xor_labels)

        estimator.train(input_fn=train_input_fn, max_steps=3)
        estimator.evaluate(input_fn=train_input_fn, steps=3)

        subnetwork_subdir = os.path.join(self.test_subdirectory,
                                         "subnetwork/t0_dnn")

        ensemble_loss = .5
        ensemble_subdir = os.path.join(
            self.test_subdirectory,
            "ensemble/t0_dnn_grow_complexity_regularized")

        self.assertAlmostEqual(ensemble_loss,
                               _get_summary_value("loss",
                                                  self.test_subdirectory),
                               places=1)
        self.assertEqual(
            0.,
            _get_summary_value("iteration/adanet/iteration",
                               self.test_subdirectory))
        self.assertAlmostEqual(3.,
                               _get_summary_value("scalar", subnetwork_subdir),
                               places=3)
        self.assertEqual((3, 3, 1),
                         _get_summary_value("image/image/0",
                                            subnetwork_subdir))
        self.assertAlmostEqual(5.,
                               _get_summary_value("nested/scalar",
                                                  subnetwork_subdir),
                               places=3)
        self.assertAlmostEqual(
            ensemble_loss,
            _get_summary_value("adanet_loss/adanet/adanet_weighted_ensemble",
                               ensemble_subdir),
            places=1)
        self.assertAlmostEqual(
            0.,
            _get_summary_value(
                "complexity_regularization/adanet/adanet_weighted_ensemble",
                ensemble_subdir),
            places=1)
        self.assertAlmostEqual(1.,
                               _get_summary_value(
                                   "mixture_weight_norms/adanet/"
                                   "adanet_weighted_ensemble/subnetwork_0",
                                   ensemble_subdir),
                               places=1)

        # Eval metric summaries are always written out during eval.
        subnetwork_eval_subdir = os.path.join(subnetwork_subdir, "eval")
        if use_tpu:
            # TODO: Why is subnetwork eval loss 0.0 when use_tpu=False?
            self.assertAlmostEqual(ensemble_loss,
                                   _get_summary_value("loss",
                                                      subnetwork_eval_subdir),
                                   places=1)
        self.assertAlmostEqual(ensemble_loss,
                               _get_summary_value("average_loss",
                                                  subnetwork_eval_subdir),
                               places=1)

        eval_subdir = os.path.join(self.test_subdirectory, "eval")
        ensemble_eval_subdir = os.path.join(ensemble_subdir, "eval")
        for subdir in [ensemble_eval_subdir, eval_subdir]:
            self.assertEqual([b"| dnn |"],
                             _get_summary_value(
                                 "architecture/adanet/ensembles/0", subdir))
            if subdir == eval_subdir:
                self.assertAlmostEqual(ensemble_loss,
                                       _get_summary_value("loss", subdir),
                                       places=1)
            self.assertAlmostEqual(ensemble_loss,
                                   _get_summary_value("average_loss", subdir),
                                   places=1)