Example #1
0
    def test_iteration_metrics(self, use_tpu, mode):
        best_candidate_index = 3
        candidates = []
        for i in range(10):

            def metric_fn(val=i):
                return {"ensemble_metric": tf.metrics.mean(tf.constant(val))}

            spec = _EnsembleSpec(name="ensemble_{}".format(i),
                                 ensemble=None,
                                 architecture=None,
                                 subnetwork_builders=None,
                                 predictions=None,
                                 eval_metrics=(metric_fn, {}))
            candidate = _Candidate(ensemble_spec=spec,
                                   adanet_loss=tf.constant(i),
                                   is_training=tf.constant(False))
            candidates.append(candidate)
        metrics = _IterationMetrics(candidates, subnetwork_specs=[])

        with self.test_session() as sess:
            metrics_fn = (metrics.best_eval_metrics_tuple
                          if use_tpu else metrics.best_eval_metric_ops)
            actual = _run_metrics(
                sess,
                metrics_fn(tf.constant(best_candidate_index), mode) or {})

        if mode == tf.estimator.ModeKeys.EVAL:
            expected = {"ensemble_metric": best_candidate_index}
        else:
            expected = {}
        self.assertEqual(actual, expected)
Example #2
0
def create_iteration_metrics(subnetwork_metrics=None,
                             ensemble_metrics=None,
                             use_tpu=False,
                             iteration_number=1):
    """Creates an instance of the _IterationMetrics class.

  Args:
    subnetwork_metrics: List of _SubnetworkMetrics objects.
    ensemble_metrics: List of _EnsembleMetrics objects.
    use_tpu: Whether to use TPU-specific variable sharing logic.
    iteration_number: What number iteration these metrics are for.

  Returns:
    An instance of _IterationMetrics that has been populated with the
    input metrics.
  """
    subnetwork_metrics = subnetwork_metrics or []
    ensemble_metrics = ensemble_metrics or []

    candidates = []
    for i, metric in enumerate(ensemble_metrics):
        spec = _EnsembleSpec(name="ensemble_{}".format(i),
                             ensemble=None,
                             architecture=None,
                             subnetwork_builders=None,
                             predictions=None,
                             step=None,
                             variables=None,
                             eval_metrics=metric)

        candidate = _Candidate(ensemble_spec=spec,
                               adanet_loss=tf.constant(i),
                               variables=None)
        candidates.append(candidate)

    subnetwork_specs = []
    for i, metric in enumerate(subnetwork_metrics):
        spec = _SubnetworkSpec(name="subnetwork_{}".format(i),
                               subnetwork=None,
                               builder=None,
                               predictions=None,
                               step=None,
                               loss=None,
                               train_op=None,
                               asset_dir=None,
                               eval_metrics=metric,
                               variables=None)
        subnetwork_specs.append(spec)

    return _IterationMetrics(iteration_number,
                             candidates,
                             subnetwork_specs=subnetwork_specs,
                             use_tpu=use_tpu)
Example #3
0
    def test_iteration_metrics(self, use_tpu, mode):
        with context.graph_mode():
            self.setup_graph()
            best_candidate_index = 3
            candidates = []
            for i in range(10):

                def metric_fn(val=i):
                    metric = tf.keras.metrics.Mean()
                    metric.update_state(tf.constant(val))
                    return {
                        "ensemble_v1_metric":
                        tf_compat.v1.metrics.mean(tf.constant(val)),
                        "ensemble_keras_metric":
                        metric
                    }

                spec = _EnsembleSpec(name="ensemble_{}".format(i),
                                     ensemble=None,
                                     architecture=None,
                                     subnetwork_builders=None,
                                     predictions=None,
                                     step=None,
                                     eval_metrics=(metric_fn, {}))
                candidate = _Candidate(ensemble_spec=spec,
                                       adanet_loss=tf.constant(i))
                candidates.append(candidate)
            metrics = _IterationMetrics(1, candidates, subnetwork_specs=[])

            metrics_fn = (metrics.best_eval_metrics_tuple
                          if use_tpu else metrics.best_eval_metric_ops)
            actual = self._run_metrics(
                metrics_fn(tf.constant(best_candidate_index), mode) or {})

            if mode == tf.estimator.ModeKeys.EVAL:
                expected = {
                    "ensemble_v1_metric": best_candidate_index,
                    "ensemble_keras_metric": best_candidate_index,
                    "iteration": 1
                }
            else:
                expected = {}
            self.assertEqual(actual, expected)
Example #4
0
  def build_iteration(self,
                      iteration_number,
                      ensemble_candidates,
                      subnetwork_builders,
                      features,
                      mode,
                      labels=None,
                      previous_ensemble_summary=None,
                      previous_ensemble_spec=None,
                      skip_summaries=False,
                      rebuilding=False):
    """Builds and returns AdaNet iteration t.

    This method uses the generated the candidate subnetworks given the ensemble
    at iteration t-1 and creates graph operations to train them. The returned
    `_Iteration` tracks the training of all candidates to know when the
    iteration is over, and tracks the best candidate's predictions and loss, as
    defined by lowest complexity-regularized loss on the train set.

    Args:
      iteration_number: Integer iteration number.
      ensemble_candidates: Iterable of `adanet.ensemble.Candidate` instances.
      subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the
        graph. Each subnetwork is then wrapped in a `_Candidate` to train.
      features: Dictionary of `Tensor` objects keyed by feature name.
      mode: Defines whether this is training, evaluation or prediction. See
        `ModeKeys`.
      labels: `Tensor` of labels. Can be `None`.
      previous_ensemble_summary: The `adanet.Summary` for the previous ensemble.
      previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1.
      skip_summaries: Whether to skip creating the summary ops when building
        the `_Iteration`.
      rebuilding: Boolean whether the iteration is being rebuilt only to restore
        the previous best subnetworks and ensembles.

    Returns:
      An _Iteration instance.

    Raises:
      ValueError: If subnetwork_builders is empty.
      ValueError: If two subnetworks share the same name.
      ValueError: If two ensembles share the same name.
    """

    tf.logging.info("%s iteration %s",
                    "Rebuilding" if rebuilding else "Building",
                    iteration_number)

    if not subnetwork_builders:
      raise ValueError("Each iteration must have at least one Builder.")

    # TODO: Consider moving builder mode logic to ensemble_builder.py.
    builder_mode = mode
    if rebuilding:
      # Build the subnetworks and ensembles in EVAL mode by default. This way
      # their outputs aren't affected by dropout etc.
      builder_mode = tf.estimator.ModeKeys.EVAL
      if mode == tf.estimator.ModeKeys.PREDICT:
        builder_mode = mode

      # Only replicate in training mode when the user requests it.
      if self._replicate_ensemble_in_training and (
          mode == tf.estimator.ModeKeys.TRAIN):
        builder_mode = mode

    features, labels = self._check_numerics(features, labels)

    training = mode == tf.estimator.ModeKeys.TRAIN
    skip_summaries = mode == tf.estimator.ModeKeys.PREDICT
    with tf.variable_scope("iteration_{}".format(iteration_number)):
      # Iteration step to use instead of global step.
      iteration_step = tf.get_variable(
          "step",
          shape=[],
          initializer=tf.zeros_initializer(),
          trainable=False,
          dtype=tf.int64)

      # Convert to tensor so that users cannot mutate it.
      iteration_step_tensor = tf.convert_to_tensor(iteration_step)

      seen_builder_names = {}
      candidates = []
      summaries = []
      subnetwork_reports = {}
      previous_ensemble = None

      if previous_ensemble_spec:
        previous_ensemble = previous_ensemble_spec.ensemble
        # Include previous best subnetwork as a candidate so that its
        # predictions are returned until a new candidate outperforms.
        seen_builder_names = {previous_ensemble_spec.name: True}
        previous_best_candidate = self._candidate_builder.build_candidate(
            ensemble_spec=previous_ensemble_spec,
            training=training,
            iteration_step=iteration_step_tensor,
            summary=previous_ensemble_summary,
            is_previous_best=True)
        candidates.append(previous_best_candidate)
        summaries.append(previous_ensemble_summary)

        # Generate subnetwork reports.
        if mode == tf.estimator.ModeKeys.EVAL:
          metrics = call_eval_metrics(previous_ensemble_spec.eval_metrics)
          subnetwork_report = subnetwork.Report(
              hparams={},
              attributes={},
              metrics=metrics,
          )
          subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean(
              previous_ensemble_spec.adanet_loss)
          subnetwork_reports["previous_ensemble"] = subnetwork_report

      for subnetwork_builder in subnetwork_builders:
        if subnetwork_builder.name in seen_builder_names:
          raise ValueError("Two subnetworks have the same name '{}'".format(
              subnetwork_builder.name))
        seen_builder_names[subnetwork_builder.name] = True
      subnetwork_specs = []
      num_subnetworks = len(subnetwork_builders)
      for i, subnetwork_builder in enumerate(subnetwork_builders):
        if not self._placement_strategy.should_build_subnetwork(
            num_subnetworks, i) and not rebuilding:
          continue
        subnetwork_name = "t{}_{}".format(iteration_number,
                                          subnetwork_builder.name)
        subnetwork_summary = self._summary_maker(
            namespace="subnetwork",
            scope=subnetwork_name,
            skip_summary=skip_summaries or rebuilding)
        summaries.append(subnetwork_summary)
        tf.logging.info("%s subnetwork '%s'",
                        "Rebuilding" if rebuilding else "Building",
                        subnetwork_builder.name)
        subnetwork_spec = self._subnetwork_manager.build_subnetwork_spec(
            name=subnetwork_name,
            subnetwork_builder=subnetwork_builder,
            iteration_step=iteration_step_tensor,
            summary=subnetwork_summary,
            features=features,
            mode=builder_mode,
            labels=labels,
            previous_ensemble=previous_ensemble)
        subnetwork_specs.append(subnetwork_spec)
        if not self._placement_strategy.should_build_ensemble(
            num_subnetworks) and not rebuilding:
          # Workers that don't build ensembles need a dummy candidate in order
          # to train the subnetwork.
          # Because only ensembles can be considered candidates, we need to
          # convert the subnetwork into a dummy ensemble and subsequently a
          # dummy candidate. However, this dummy candidate is never considered a
          # true candidate during candidate evaluation and selection.
          # TODO: Eliminate need for candidates.
          dummy_candidate = self._candidate_builder.build_candidate(
              # pylint: disable=protected-access
              ensemble_spec=ensemble_builder_lib._EnsembleSpec(
                  name=subnetwork_name,
                  ensemble=None,
                  architecture=None,
                  subnetwork_builders=subnetwork_builders,
                  predictions=subnetwork_spec.predictions,
                  loss=subnetwork_spec.loss,
                  adanet_loss=0.),
              # pylint: enable=protected-access
              training=training,
              iteration_step=iteration_step_tensor,
              summary=subnetwork_summary,
              track_moving_average=False)
          candidates.append(dummy_candidate)
        # Generate subnetwork reports.
        if mode != tf.estimator.ModeKeys.PREDICT:
          subnetwork_report = subnetwork_builder.build_subnetwork_report()
          if not subnetwork_report:
            subnetwork_report = subnetwork.Report(
                hparams={}, attributes={}, metrics={})
          metrics = call_eval_metrics(subnetwork_spec.eval_metrics)
          for metric_name in sorted(metrics):
            metric = metrics[metric_name]
            subnetwork_report.metrics[metric_name] = metric
          subnetwork_reports[subnetwork_builder.name] = subnetwork_report

      # Create (ensembler_candidate*ensembler) ensembles.
      seen_ensemble_names = {}
      for ensembler in self._ensemblers:
        for ensemble_candidate in ensemble_candidates:
          if not self._placement_strategy.should_build_ensemble(
              num_subnetworks) and not rebuilding:
            continue
          ensemble_name = "t{}_{}_{}".format(
              iteration_number, ensemble_candidate.name, ensembler.name)
          if ensemble_name in seen_ensemble_names:
            raise ValueError(
                "Two ensembles have the same name '{}'".format(ensemble_name))
          seen_ensemble_names[ensemble_name] = True
          summary = self._summary_maker(
              namespace="ensemble",
              scope=ensemble_name,
              skip_summary=skip_summaries or rebuilding)
          summaries.append(summary)
          ensemble_spec = self._ensemble_builder.build_ensemble_spec(
              name=ensemble_name,
              candidate=ensemble_candidate,
              ensembler=ensembler,
              subnetwork_specs=subnetwork_specs,
              summary=summary,
              features=features,
              mode=builder_mode,
              iteration_step=iteration_step_tensor,
              iteration_number=iteration_number,
              labels=labels,
              previous_ensemble_spec=previous_ensemble_spec)
          # TODO: Eliminate need for candidates.
          # TODO: Don't track moving average of loss when rebuilding
          # previous ensemble.
          candidate = self._candidate_builder.build_candidate(
              ensemble_spec=ensemble_spec,
              training=training,
              iteration_step=iteration_step_tensor,
              summary=summary)
          candidates.append(candidate)
          # TODO: Move adanet_loss from subnetwork report to a new
          # ensemble report, since the adanet_loss is associated with an
          # ensemble, and only when using a ComplexityRegularizedEnsemblers.
          # Keep adanet_loss in subnetwork report for backwards compatibility.
          if len(ensemble_candidates) != len(subnetwork_builders):
            continue
          if len(ensemble_candidate.subnetwork_builders) > 1:
            continue
          if mode == tf.estimator.ModeKeys.PREDICT:
            continue
          builder_name = ensemble_candidate.subnetwork_builders[0].name
          subnetwork_reports[builder_name].metrics[
              "adanet_loss"] = tf.metrics.mean(ensemble_spec.adanet_loss)

      # Dynamically select the outputs of best candidate.
      best_candidate_index = self._best_candidate_index(candidates)
      best_predictions = self._best_predictions(candidates,
                                                best_candidate_index)
      best_loss = self._best_loss(candidates, best_candidate_index, mode)
      best_export_outputs = self._best_export_outputs(
          candidates, best_candidate_index, mode, best_predictions)
      # Hooks on TPU cannot depend on any graph `Tensors`. Instead the value of
      # `is_over` is stored in a `Variable` that can later be retrieved from
      # inside a training hook.
      is_over_var_template = tf.make_template("is_over_var_template",
                                              _is_over_var)
      training_chief_hooks, training_hooks = (), ()
      for subnetwork_spec in subnetwork_specs:
        if not self._placement_strategy.should_train_subnetworks(
            num_subnetworks) and not rebuilding:
          continue
        if not subnetwork_spec.train_op:
          continue
        training_chief_hooks += subnetwork_spec.train_op.chief_hooks or ()
        training_hooks += subnetwork_spec.train_op.hooks or ()
      for candidate in candidates:
        spec = candidate.ensemble_spec
        if not spec.train_op:
          continue
        training_chief_hooks += spec.train_op.chief_hooks or ()
        training_hooks += spec.train_op.hooks or ()
      summary = self._summary_maker(
          namespace=None, scope=None, skip_summary=skip_summaries or rebuilding)
      summaries.append(summary)
      with summary.current_scope():
        summary.scalar("iteration/adanet/iteration", iteration_number)
        summary.scalar("iteration_step/adanet/iteration_step",
                       iteration_step_tensor)
        if best_loss is not None:
          summary.scalar("loss", best_loss)
      train_op = self._create_train_op(subnetwork_specs, candidates, mode,
                                       iteration_step, is_over_var_template,
                                       num_subnetworks)
      iteration_metrics = _IterationMetrics(candidates, subnetwork_specs)
      if self._use_tpu:
        estimator_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode,
            predictions=best_predictions,
            loss=best_loss,
            train_op=train_op,
            eval_metrics=iteration_metrics.best_eval_metrics_tuple(
                best_candidate_index, mode),
            export_outputs=best_export_outputs,
            training_hooks=training_hooks)
      else:
        estimator_spec = tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=best_predictions,
            loss=best_loss,
            train_op=train_op,
            eval_metric_ops=iteration_metrics.best_eval_metric_ops(
                best_candidate_index, mode),
            export_outputs=best_export_outputs,
            training_chief_hooks=training_chief_hooks,
            training_hooks=training_hooks)

      return _Iteration(
          number=iteration_number,
          candidates=candidates,
          subnetwork_specs=subnetwork_specs,
          estimator_spec=estimator_spec,
          best_candidate_index=best_candidate_index,
          summaries=summaries,
          is_over_fn=is_over_var_template,
          subnetwork_reports=subnetwork_reports,
          step=iteration_step_tensor)
Example #5
0
  def build_iteration(self,
                      base_global_step,
                      iteration_number,
                      ensemble_candidates,
                      subnetwork_builders,
                      features,
                      mode,
                      config,
                      labels=None,
                      previous_ensemble_summary=None,
                      previous_ensemble_spec=None,
                      rebuilding=False,
                      rebuilding_ensembler_name=None,
                      best_ensemble_index_override=None):
    """Builds and returns AdaNet iteration t.

    This method uses the generated the candidate subnetworks given the ensemble
    at iteration t-1 and creates graph operations to train them. The returned
    `_Iteration` tracks the training of all candidates to know when the
    iteration is over, and tracks the best candidate's predictions and loss, as
    defined by lowest complexity-regularized loss on the train set.

    Args:
      base_global_step: Integer global step at the beginning of this iteration.
      iteration_number: Integer iteration number.
      ensemble_candidates: Iterable of `adanet.ensemble.Candidate` instances.
      subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the
        graph. Each subnetwork is then wrapped in a `_Candidate` to train.
      features: Dictionary of `Tensor` objects keyed by feature name.
      mode: Defines whether this is training, evaluation or prediction. See
        `ModeKeys`.
      config: The `tf.estimator.RunConfig` to use this iteration.
      labels: `Tensor` of labels. Can be `None`.
      previous_ensemble_summary: The `adanet.Summary` for the previous ensemble.
      previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1.
      rebuilding: Boolean whether the iteration is being rebuilt only to restore
        the previous best subnetworks and ensembles.
      rebuilding_ensembler_name: Optional ensembler to restrict to, only
        relevant when rebuilding is set as True.
      best_ensemble_index_override: Integer index to identify the best ensemble
        candidate instead of computing the best ensemble index dynamically
        conditional on the ensemble AdaNet losses.

    Returns:
      An _Iteration instance.

    Raises:
      ValueError: If subnetwork_builders is empty.
      ValueError: If two subnetworks share the same name.
      ValueError: If two ensembles share the same name.
    """

    self._placement_strategy.config = config

    logging.info("%s iteration %s", "Rebuilding" if rebuilding else "Building",
                 iteration_number)

    if not subnetwork_builders:
      raise ValueError("Each iteration must have at least one Builder.")

    # TODO: Consider moving builder mode logic to ensemble_builder.py.
    builder_mode = mode
    if rebuilding:
      # Build the subnetworks and ensembles in EVAL mode by default. This way
      # their outputs aren't affected by dropout etc.
      builder_mode = tf.estimator.ModeKeys.EVAL
      if mode == tf.estimator.ModeKeys.PREDICT:
        builder_mode = mode

      # Only replicate in training mode when the user requests it.
      if self._replicate_ensemble_in_training and (
          mode == tf.estimator.ModeKeys.TRAIN):
        builder_mode = mode

    features, labels = self._check_numerics(features, labels)

    training = mode == tf.estimator.ModeKeys.TRAIN
    skip_summaries = mode == tf.estimator.ModeKeys.PREDICT or rebuilding
    with tf_compat.v1.variable_scope("iteration_{}".format(iteration_number)):
      seen_builder_names = {}
      candidates = []
      summaries = []
      subnetwork_reports = {}
      previous_ensemble = None

      if previous_ensemble_spec:
        previous_ensemble = previous_ensemble_spec.ensemble
        # Include previous best subnetwork as a candidate so that its
        # predictions are returned until a new candidate outperforms.
        seen_builder_names = {previous_ensemble_spec.name: True}
        previous_best_candidate = self._candidate_builder.build_candidate(
            ensemble_spec=previous_ensemble_spec,
            training=training,
            summary=previous_ensemble_summary)
        candidates.append(previous_best_candidate)
        if self._enable_ensemble_summaries:
          summaries.append(previous_ensemble_summary)

        # Generate subnetwork reports.
        if mode == tf.estimator.ModeKeys.EVAL:
          metrics = call_eval_metrics(previous_ensemble_spec.eval_metrics)
          subnetwork_report = subnetwork.Report(
              hparams={},
              attributes={},
              metrics=metrics,
          )
          subnetwork_report.metrics["adanet_loss"] = tf_compat.v1.metrics.mean(
              previous_ensemble_spec.adanet_loss)
          subnetwork_reports["previous_ensemble"] = subnetwork_report

      for subnetwork_builder in subnetwork_builders:
        if subnetwork_builder.name in seen_builder_names:
          raise ValueError("Two subnetworks have the same name '{}'".format(
              subnetwork_builder.name))
        seen_builder_names[subnetwork_builder.name] = True
      subnetwork_specs = []
      num_subnetworks = len(subnetwork_builders)
      skip_summary = skip_summaries or not self._enable_subnetwork_summaries
      for i, subnetwork_builder in enumerate(subnetwork_builders):
        if not self._placement_strategy.should_build_subnetwork(
            num_subnetworks, i) and not rebuilding:
          continue
        with self._placement_strategy.subnetwork_devices(num_subnetworks, i):
          subnetwork_name = "t{}_{}".format(iteration_number,
                                            subnetwork_builder.name)
          subnetwork_summary = self._summary_maker(
              namespace="subnetwork",
              scope=subnetwork_name,
              skip_summary=skip_summary)
          if not skip_summary:
            summaries.append(subnetwork_summary)
          logging.info("%s subnetwork '%s'",
                       "Rebuilding" if rebuilding else "Building",
                       subnetwork_builder.name)
          subnetwork_spec = self._subnetwork_manager.build_subnetwork_spec(
              name=subnetwork_name,
              subnetwork_builder=subnetwork_builder,
              summary=subnetwork_summary,
              features=features,
              mode=builder_mode,
              labels=labels,
              previous_ensemble=previous_ensemble)
          subnetwork_specs.append(subnetwork_spec)
          # Workers that don't build ensembles need a dummy candidate in order
          # to train the subnetwork.
          # Because only ensembles can be considered candidates, we need to
          # convert the subnetwork into a dummy ensemble and subsequently a
          # dummy candidate. However, this dummy candidate is never considered a
          # true candidate during candidate evaluation and selection.
          # TODO: Eliminate need for candidates.
          if not self._placement_strategy.should_build_ensemble(
              num_subnetworks) and not rebuilding:
            candidates.append(
                self._create_dummy_candidate(subnetwork_spec,
                                             subnetwork_builders,
                                             subnetwork_summary, training))
        # Generate subnetwork reports.
        if mode != tf.estimator.ModeKeys.PREDICT:
          subnetwork_report = subnetwork_builder.build_subnetwork_report()
          if not subnetwork_report:
            subnetwork_report = subnetwork.Report(
                hparams={}, attributes={}, metrics={})
          metrics = call_eval_metrics(subnetwork_spec.eval_metrics)
          for metric_name in sorted(metrics):
            metric = metrics[metric_name]
            subnetwork_report.metrics[metric_name] = metric
          subnetwork_reports[subnetwork_builder.name] = subnetwork_report

      # Create (ensemble_candidate*ensembler) ensembles.
      skip_summary = skip_summaries or not self._enable_ensemble_summaries
      seen_ensemble_names = {}
      for ensembler in self._ensemblers:
        if rebuilding and rebuilding_ensembler_name and (
            ensembler.name != rebuilding_ensembler_name):
          continue
        for ensemble_candidate in ensemble_candidates:
          if not self._placement_strategy.should_build_ensemble(
              num_subnetworks) and not rebuilding:
            continue
          ensemble_name = "t{}_{}_{}".format(iteration_number,
                                             ensemble_candidate.name,
                                             ensembler.name)
          if ensemble_name in seen_ensemble_names:
            raise ValueError(
                "Two ensembles have the same name '{}'".format(ensemble_name))
          seen_ensemble_names[ensemble_name] = True
          summary = self._summary_maker(
              namespace="ensemble",
              scope=ensemble_name,
              skip_summary=skip_summary)
          if not skip_summary:
            summaries.append(summary)
          ensemble_spec = self._ensemble_builder.build_ensemble_spec(
              name=ensemble_name,
              candidate=ensemble_candidate,
              ensembler=ensembler,
              subnetwork_specs=subnetwork_specs,
              summary=summary,
              features=features,
              mode=builder_mode,
              iteration_number=iteration_number,
              labels=labels,
              previous_ensemble_spec=previous_ensemble_spec)
          # TODO: Eliminate need for candidates.
          # TODO: Don't track moving average of loss when rebuilding
          # previous ensemble.
          candidate = self._candidate_builder.build_candidate(
              ensemble_spec=ensemble_spec, training=training, summary=summary)
          candidates.append(candidate)
          # TODO: Move adanet_loss from subnetwork report to a new
          # ensemble report, since the adanet_loss is associated with an
          # ensemble, and only when using a ComplexityRegularizedEnsemblers.
          # Keep adanet_loss in subnetwork report for backwards compatibility.
          if len(ensemble_candidates) != len(subnetwork_builders):
            continue
          if len(ensemble_candidate.subnetwork_builders) > 1:
            continue
          if mode == tf.estimator.ModeKeys.PREDICT:
            continue
          builder_name = ensemble_candidate.subnetwork_builders[0].name
          subnetwork_reports[builder_name].metrics[
              "adanet_loss"] = tf_compat.v1.metrics.mean(
                  ensemble_spec.adanet_loss)

      # Dynamically select the outputs of best candidate.
      best_candidate_index = self._best_candidate_index(
          candidates, best_ensemble_index_override)
      best_predictions = self._best_predictions(candidates,
                                                best_candidate_index)
      best_loss = self._best_loss(candidates, best_candidate_index, mode)
      best_export_outputs = self._best_export_outputs(candidates,
                                                      best_candidate_index,
                                                      mode, best_predictions)
      train_manager_dir = os.path.join(config.model_dir, "train_manager",
                                       "t{}".format(iteration_number))
      train_manager, training_chief_hooks, training_hooks = self._create_hooks(
          base_global_step, subnetwork_specs, candidates, num_subnetworks,
          rebuilding, train_manager_dir, config.is_chief)
      # Iteration summaries.
      summary = self._summary_maker(
          namespace=None, scope=None, skip_summary=skip_summaries)
      summaries.append(summary)
      with summary.current_scope():
        summary.scalar("iteration/adanet/iteration", iteration_number)
        if best_loss is not None:
          summary.scalar("loss", best_loss)
      iteration_metrics = _IterationMetrics(iteration_number, candidates,
                                            subnetwork_specs)
      # All training happens in hooks so we don't need a train op.
      train_op = tf.no_op()
      if self._use_tpu:
        estimator_spec = tf_compat.v1.estimator.tpu.TPUEstimatorSpec(
            mode=mode,
            predictions=best_predictions,
            loss=best_loss,
            train_op=self._create_tpu_train_op(base_global_step,
                                               subnetwork_specs, candidates,
                                               mode, num_subnetworks, config),
            eval_metrics=iteration_metrics.best_eval_metrics_tuple(
                best_candidate_index, mode),
            export_outputs=best_export_outputs,
            training_hooks=training_hooks)
      else:
        estimator_spec = tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=best_predictions,
            loss=best_loss,
            train_op=train_op,
            eval_metric_ops=iteration_metrics.best_eval_metric_ops(
                best_candidate_index, mode),
            export_outputs=best_export_outputs,
            training_chief_hooks=training_chief_hooks,
            training_hooks=training_hooks)

      return _Iteration(
          number=iteration_number,
          candidates=candidates,
          subnetwork_specs=subnetwork_specs,
          estimator_spec=estimator_spec,
          best_candidate_index=best_candidate_index,
          summaries=summaries,
          train_manager=train_manager,
          subnetwork_reports=subnetwork_reports)