def _create_dummy_candidate(self, subnetwork_spec, subnetwork_builders, subnetwork_summary, training): """Returns a dummy candidate for the given SubnetworkSpec. AdaNet only considers ensembles as candidate models, and ensembles are represented as `_Candidates`. When training only subnetworks, such as on a subnetwork-worker in the RoundRobinStrategy, then we still need a candidate to manage the training of the subnetwork, even if it gets discarded, hence the dummy candidate. Args: subnetwork_spec: The subnetwork spec for the dummy candidate to wrap. subnetwork_builders: List of all subnetwork builders generated this iteration. subnetwork_summary: `_Summary` object to use for TensorBoard. training: Whether or not we are currently training. """ dummy_ensemble_spec = _EnsembleSpec( name="dummy_{}".format(subnetwork_spec.name), ensemble=None, architecture=None, subnetwork_builders=subnetwork_builders, predictions=subnetwork_spec.predictions, loss=subnetwork_spec.loss, step=None, adanet_loss=0.) return self._candidate_builder.build_candidate( ensemble_spec=dummy_ensemble_spec, training=training, summary=subnetwork_summary, track_moving_average=False)
def append_new_subnetwork(self, ensemble_name, ensemble_spec, subnetwork_builder, iteration_number, iteration_step, summary, features, mode, labels, params): del ensemble_name del ensemble_spec del subnetwork_builder del iteration_number del iteration_step del summary del params logits = [[.5]] estimator_spec = self._head.create_estimator_spec(features=features, mode=mode, labels=labels, logits=logits) return _EnsembleSpec(name="test", ensemble=None, architecture=None, predictions=estimator_spec.predictions, loss=None, adanet_loss=.1, subnetwork_train_op=None, ensemble_train_op=None, eval_metrics=None, export_outputs=estimator_spec.export_outputs)
def test_iteration_metrics(self, use_tpu, mode): best_candidate_index = 3 candidates = [] for i in range(10): def metric_fn(val=i): return {"ensemble_metric": tf.metrics.mean(tf.constant(val))} spec = _EnsembleSpec(name="ensemble_{}".format(i), ensemble=None, architecture=None, subnetwork_builders=None, predictions=None, eval_metrics=(metric_fn, {})) candidate = _Candidate(ensemble_spec=spec, adanet_loss=tf.constant(i), is_training=tf.constant(False)) candidates.append(candidate) metrics = _IterationMetrics(candidates, subnetwork_specs=[]) with self.test_session() as sess: metrics_fn = (metrics.best_eval_metrics_tuple if use_tpu else metrics.best_eval_metric_ops) actual = _run_metrics( sess, metrics_fn(tf.constant(best_candidate_index), mode) or {}) if mode == tf.estimator.ModeKeys.EVAL: expected = {"ensemble_metric": best_candidate_index} else: expected = {} self.assertEqual(actual, expected)
def build_ensemble_spec(self, name, candidate, ensembler, subnetwork_specs, summary, features, mode, iteration_number, labels=None, previous_ensemble_spec=None, params=None): del ensembler del subnetwork_specs del summary del iteration_number del previous_ensemble_spec del params logits = [[.5]] estimator_spec = self._head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits) return _EnsembleSpec( name=name, ensemble=None, architecture=None, subnetwork_builders=candidate.subnetwork_builders, predictions=estimator_spec.predictions, step=tf.Variable(0), loss=None, adanet_loss=.1, train_op=None, eval_metrics=None, export_outputs=estimator_spec.export_outputs)
def create_iteration_metrics(subnetwork_metrics=None, ensemble_metrics=None, use_tpu=False, iteration_number=1): """Creates an instance of the _IterationMetrics class. Args: subnetwork_metrics: List of _SubnetworkMetrics objects. ensemble_metrics: List of _EnsembleMetrics objects. use_tpu: Whether to use TPU-specific variable sharing logic. iteration_number: What number iteration these metrics are for. Returns: An instance of _IterationMetrics that has been populated with the input metrics. """ subnetwork_metrics = subnetwork_metrics or [] ensemble_metrics = ensemble_metrics or [] candidates = [] for i, metric in enumerate(ensemble_metrics): spec = _EnsembleSpec(name="ensemble_{}".format(i), ensemble=None, architecture=None, subnetwork_builders=None, predictions=None, step=None, variables=None, eval_metrics=metric) candidate = _Candidate(ensemble_spec=spec, adanet_loss=tf.constant(i), variables=None) candidates.append(candidate) subnetwork_specs = [] for i, metric in enumerate(subnetwork_metrics): spec = _SubnetworkSpec(name="subnetwork_{}".format(i), subnetwork=None, builder=None, predictions=None, step=None, loss=None, train_op=None, asset_dir=None, eval_metrics=metric, variables=None) subnetwork_specs.append(spec) return _IterationMetrics(iteration_number, candidates, subnetwork_specs=subnetwork_specs, use_tpu=use_tpu)
def test_iteration_metrics(self, use_tpu, mode): with context.graph_mode(): self.setup_graph() best_candidate_index = 3 candidates = [] for i in range(10): def metric_fn(val=i): metric = tf.keras.metrics.Mean() metric.update_state(tf.constant(val)) return { "ensemble_v1_metric": tf_compat.v1.metrics.mean(tf.constant(val)), "ensemble_keras_metric": metric } spec = _EnsembleSpec(name="ensemble_{}".format(i), ensemble=None, architecture=None, subnetwork_builders=None, predictions=None, step=None, eval_metrics=(metric_fn, {})) candidate = _Candidate(ensemble_spec=spec, adanet_loss=tf.constant(i)) candidates.append(candidate) metrics = _IterationMetrics(1, candidates, subnetwork_specs=[]) metrics_fn = (metrics.best_eval_metrics_tuple if use_tpu else metrics.best_eval_metric_ops) actual = self._run_metrics( metrics_fn(tf.constant(best_candidate_index), mode) or {}) if mode == tf.estimator.ModeKeys.EVAL: expected = { "ensemble_v1_metric": best_candidate_index, "ensemble_keras_metric": best_candidate_index, "iteration": 1 } else: expected = {} self.assertEqual(actual, expected)
def build_ensemble_spec(self, name, candidate, ensembler, subnetwork_specs, summary, features, mode, iteration_number, labels=None, previous_ensemble_spec=None, my_ensemble_index=None, params=None, previous_iteration_checkpoint=None): del ensembler del subnetwork_specs del summary del iteration_number del previous_ensemble_spec del my_ensemble_index del params del previous_iteration_checkpoint logits = [[.5]] estimator_spec = self._head.create_estimator_spec(features=features, mode=mode, labels=labels, logits=logits) return _EnsembleSpec(name=name, ensemble=None, architecture=_Architecture("foo", "bar"), subnetwork_builders=candidate.subnetwork_builders, predictions=estimator_spec.predictions, step=tf.Variable(0, dtype=tf.int64), variables=[tf.Variable(1.)], loss=None, adanet_loss=.1, train_op=None, eval_metrics=None, export_outputs=estimator_spec.export_outputs)
def dummy_ensemble_spec(name, random_seed=42, num_subnetworks=1, bias=0., loss=None, adanet_loss=None, eval_metrics=None, dict_predictions=False, export_output_key=None, subnetwork_builders=None, train_op=None): """Creates a dummy `_EnsembleSpec` instance. Args: name: _EnsembleSpec's name. random_seed: A scalar random seed. num_subnetworks: The number of fake subnetworks in this ensemble. bias: Bias value. loss: Float loss to return. When None, it's picked from a random distribution. adanet_loss: Float AdaNet loss to return. When None, it's picked from a random distribution. eval_metrics: Optional eval metrics tuple of (metric_fn, tensor args). dict_predictions: Boolean whether to return predictions as a dictionary of `Tensor` or just a single float `Tensor`. export_output_key: An `ExportOutputKeys` for faking export outputs. subnetwork_builders: List of `adanet.subnetwork.Builder` objects. train_op: A train op. Returns: A dummy `_EnsembleSpec` instance. """ if loss is None: loss = dummy_tensor([], random_seed) if adanet_loss is None: adanet_loss = dummy_tensor([], random_seed * 2) else: adanet_loss = tf.convert_to_tensor(adanet_loss) logits = dummy_tensor([], random_seed * 3) if dict_predictions: predictions = { "logits": logits, "classes": tf.cast(tf.abs(logits), dtype=tf.int64) } else: predictions = logits weighted_subnetworks = [ WeightedSubnetwork( name=name, iteration_number=1, logits=dummy_tensor([2, 1], random_seed * 4), weight=dummy_tensor([2, 1], random_seed * 4), subnetwork=Subnetwork( last_layer=dummy_tensor([1, 2], random_seed * 4), logits=dummy_tensor([2, 1], random_seed * 4), complexity=1., persisted_tensors={})) ] export_outputs = _dummy_export_outputs(export_output_key, logits, predictions) bias = tf.constant(bias) return _EnsembleSpec( name=name, ensemble=ComplexityRegularized( weighted_subnetworks=weighted_subnetworks * num_subnetworks, bias=bias, logits=logits, ), architecture=_Architecture("dummy_ensemble_candidate"), subnetwork_builders=subnetwork_builders, predictions=predictions, loss=loss, adanet_loss=adanet_loss, train_op=train_op, eval_metrics=eval_metrics, export_outputs=export_outputs)
def build_iteration(self, iteration_number, ensemble_candidates, subnetwork_builders, features, mode, labels=None, previous_ensemble_summary=None, previous_ensemble_spec=None, skip_summaries=False, rebuilding=False): """Builds and returns AdaNet iteration t. This method uses the generated the candidate subnetworks given the ensemble at iteration t-1 and creates graph operations to train them. The returned `_Iteration` tracks the training of all candidates to know when the iteration is over, and tracks the best candidate's predictions and loss, as defined by lowest complexity-regularized loss on the train set. Args: iteration_number: Integer iteration number. ensemble_candidates: Iterable of `adanet.ensemble.Candidate` instances. subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the graph. Each subnetwork is then wrapped in a `_Candidate` to train. features: Dictionary of `Tensor` objects keyed by feature name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. labels: `Tensor` of labels. Can be `None`. previous_ensemble_summary: The `adanet.Summary` for the previous ensemble. previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1. skip_summaries: Whether to skip creating the summary ops when building the `_Iteration`. rebuilding: Boolean whether the iteration is being rebuilt only to restore the previous best subnetworks and ensembles. Returns: An _Iteration instance. Raises: ValueError: If subnetwork_builders is empty. ValueError: If two subnetworks share the same name. ValueError: If two ensembles share the same name. """ tf.logging.info("%s iteration %s", "Rebuilding" if rebuilding else "Building", iteration_number) if not subnetwork_builders: raise ValueError("Each iteration must have at least one Builder.") # TODO: Consider moving builder mode logic to ensemble_builder.py. builder_mode = mode if rebuilding: # Build the subnetworks and ensembles in EVAL mode by default. This way # their outputs aren't affected by dropout etc. builder_mode = tf.estimator.ModeKeys.EVAL if mode == tf.estimator.ModeKeys.PREDICT: builder_mode = mode # Only replicate in training mode when the user requests it. if self._replicate_ensemble_in_training and ( mode == tf.estimator.ModeKeys.TRAIN): builder_mode = mode features, labels = self._check_numerics(features, labels) training = mode == tf.estimator.ModeKeys.TRAIN skip_summaries = mode == tf.estimator.ModeKeys.PREDICT with tf.variable_scope("iteration_{}".format(iteration_number)): # Iteration step to use instead of global step. iteration_step = tf.get_variable( "step", shape=[], initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int64) # Convert to tensor so that users cannot mutate it. iteration_step_tensor = tf.convert_to_tensor(iteration_step) seen_builder_names = {} candidates = [] summaries = [] subnetwork_reports = {} previous_ensemble = None if previous_ensemble_spec: previous_ensemble = previous_ensemble_spec.ensemble # Include previous best subnetwork as a candidate so that its # predictions are returned until a new candidate outperforms. seen_builder_names = {previous_ensemble_spec.name: True} previous_best_candidate = self._candidate_builder.build_candidate( ensemble_spec=previous_ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=previous_ensemble_summary, is_previous_best=True) candidates.append(previous_best_candidate) summaries.append(previous_ensemble_summary) # Generate subnetwork reports. if mode == tf.estimator.ModeKeys.EVAL: metrics = call_eval_metrics(previous_ensemble_spec.eval_metrics) subnetwork_report = subnetwork.Report( hparams={}, attributes={}, metrics=metrics, ) subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( previous_ensemble_spec.adanet_loss) subnetwork_reports["previous_ensemble"] = subnetwork_report for subnetwork_builder in subnetwork_builders: if subnetwork_builder.name in seen_builder_names: raise ValueError("Two subnetworks have the same name '{}'".format( subnetwork_builder.name)) seen_builder_names[subnetwork_builder.name] = True subnetwork_specs = [] num_subnetworks = len(subnetwork_builders) for i, subnetwork_builder in enumerate(subnetwork_builders): if not self._placement_strategy.should_build_subnetwork( num_subnetworks, i) and not rebuilding: continue subnetwork_name = "t{}_{}".format(iteration_number, subnetwork_builder.name) subnetwork_summary = self._summary_maker( namespace="subnetwork", scope=subnetwork_name, skip_summary=skip_summaries or rebuilding) summaries.append(subnetwork_summary) tf.logging.info("%s subnetwork '%s'", "Rebuilding" if rebuilding else "Building", subnetwork_builder.name) subnetwork_spec = self._subnetwork_manager.build_subnetwork_spec( name=subnetwork_name, subnetwork_builder=subnetwork_builder, iteration_step=iteration_step_tensor, summary=subnetwork_summary, features=features, mode=builder_mode, labels=labels, previous_ensemble=previous_ensemble) subnetwork_specs.append(subnetwork_spec) if not self._placement_strategy.should_build_ensemble( num_subnetworks) and not rebuilding: # Workers that don't build ensembles need a dummy candidate in order # to train the subnetwork. # Because only ensembles can be considered candidates, we need to # convert the subnetwork into a dummy ensemble and subsequently a # dummy candidate. However, this dummy candidate is never considered a # true candidate during candidate evaluation and selection. # TODO: Eliminate need for candidates. dummy_candidate = self._candidate_builder.build_candidate( # pylint: disable=protected-access ensemble_spec=ensemble_builder_lib._EnsembleSpec( name=subnetwork_name, ensemble=None, architecture=None, subnetwork_builders=subnetwork_builders, predictions=subnetwork_spec.predictions, loss=subnetwork_spec.loss, adanet_loss=0.), # pylint: enable=protected-access training=training, iteration_step=iteration_step_tensor, summary=subnetwork_summary, track_moving_average=False) candidates.append(dummy_candidate) # Generate subnetwork reports. if mode != tf.estimator.ModeKeys.PREDICT: subnetwork_report = subnetwork_builder.build_subnetwork_report() if not subnetwork_report: subnetwork_report = subnetwork.Report( hparams={}, attributes={}, metrics={}) metrics = call_eval_metrics(subnetwork_spec.eval_metrics) for metric_name in sorted(metrics): metric = metrics[metric_name] subnetwork_report.metrics[metric_name] = metric subnetwork_reports[subnetwork_builder.name] = subnetwork_report # Create (ensembler_candidate*ensembler) ensembles. seen_ensemble_names = {} for ensembler in self._ensemblers: for ensemble_candidate in ensemble_candidates: if not self._placement_strategy.should_build_ensemble( num_subnetworks) and not rebuilding: continue ensemble_name = "t{}_{}_{}".format( iteration_number, ensemble_candidate.name, ensembler.name) if ensemble_name in seen_ensemble_names: raise ValueError( "Two ensembles have the same name '{}'".format(ensemble_name)) seen_ensemble_names[ensemble_name] = True summary = self._summary_maker( namespace="ensemble", scope=ensemble_name, skip_summary=skip_summaries or rebuilding) summaries.append(summary) ensemble_spec = self._ensemble_builder.build_ensemble_spec( name=ensemble_name, candidate=ensemble_candidate, ensembler=ensembler, subnetwork_specs=subnetwork_specs, summary=summary, features=features, mode=builder_mode, iteration_step=iteration_step_tensor, iteration_number=iteration_number, labels=labels, previous_ensemble_spec=previous_ensemble_spec) # TODO: Eliminate need for candidates. # TODO: Don't track moving average of loss when rebuilding # previous ensemble. candidate = self._candidate_builder.build_candidate( ensemble_spec=ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=summary) candidates.append(candidate) # TODO: Move adanet_loss from subnetwork report to a new # ensemble report, since the adanet_loss is associated with an # ensemble, and only when using a ComplexityRegularizedEnsemblers. # Keep adanet_loss in subnetwork report for backwards compatibility. if len(ensemble_candidates) != len(subnetwork_builders): continue if len(ensemble_candidate.subnetwork_builders) > 1: continue if mode == tf.estimator.ModeKeys.PREDICT: continue builder_name = ensemble_candidate.subnetwork_builders[0].name subnetwork_reports[builder_name].metrics[ "adanet_loss"] = tf.metrics.mean(ensemble_spec.adanet_loss) # Dynamically select the outputs of best candidate. best_candidate_index = self._best_candidate_index(candidates) best_predictions = self._best_predictions(candidates, best_candidate_index) best_loss = self._best_loss(candidates, best_candidate_index, mode) best_export_outputs = self._best_export_outputs( candidates, best_candidate_index, mode, best_predictions) # Hooks on TPU cannot depend on any graph `Tensors`. Instead the value of # `is_over` is stored in a `Variable` that can later be retrieved from # inside a training hook. is_over_var_template = tf.make_template("is_over_var_template", _is_over_var) training_chief_hooks, training_hooks = (), () for subnetwork_spec in subnetwork_specs: if not self._placement_strategy.should_train_subnetworks( num_subnetworks) and not rebuilding: continue if not subnetwork_spec.train_op: continue training_chief_hooks += subnetwork_spec.train_op.chief_hooks or () training_hooks += subnetwork_spec.train_op.hooks or () for candidate in candidates: spec = candidate.ensemble_spec if not spec.train_op: continue training_chief_hooks += spec.train_op.chief_hooks or () training_hooks += spec.train_op.hooks or () summary = self._summary_maker( namespace=None, scope=None, skip_summary=skip_summaries or rebuilding) summaries.append(summary) with summary.current_scope(): summary.scalar("iteration/adanet/iteration", iteration_number) summary.scalar("iteration_step/adanet/iteration_step", iteration_step_tensor) if best_loss is not None: summary.scalar("loss", best_loss) train_op = self._create_train_op(subnetwork_specs, candidates, mode, iteration_step, is_over_var_template, num_subnetworks) iteration_metrics = _IterationMetrics(candidates, subnetwork_specs) if self._use_tpu: estimator_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=train_op, eval_metrics=iteration_metrics.best_eval_metrics_tuple( best_candidate_index, mode), export_outputs=best_export_outputs, training_hooks=training_hooks) else: estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=train_op, eval_metric_ops=iteration_metrics.best_eval_metric_ops( best_candidate_index, mode), export_outputs=best_export_outputs, training_chief_hooks=training_chief_hooks, training_hooks=training_hooks) return _Iteration( number=iteration_number, candidates=candidates, subnetwork_specs=subnetwork_specs, estimator_spec=estimator_spec, best_candidate_index=best_candidate_index, summaries=summaries, is_over_fn=is_over_var_template, subnetwork_reports=subnetwork_reports, step=iteration_step_tensor)