Exemple #1
0
            def metric_fn(loss_value, label_ids, log_probs):
                loss = tf.compat.v1.metrics.mean(values=loss_value)

                predictions = tf.argmax(log_probs,
                                        axis=-1,
                                        output_type=tf.int32)
                accuracy = tf.compat.v1.metrics.accuracy(
                    labels=label_ids, predictions=predictions)
                p1, p1_op = tf.compat.v1.metrics.precision_at_k(
                    labels=tf.cast(label_ids, tf.int64),
                    predictions=log_probs,
                    k=1)
                r1, r1_op = tf.compat.v1.metrics.recall_at_k(
                    labels=tf.cast(label_ids, tf.int64),
                    predictions=log_probs,
                    k=1)
                f11 = tf.math.divide_no_nan(2 * p1 * r1, p1 + r1)

                metric_dict = {
                    "P@1": (p1, p1_op),
                    "R@1": (r1, r1_op),
                    "f1@1": (f11, tf.no_op()),
                    "classification_accuracy": accuracy,
                    "classification_loss": loss,
                }

                return metric_dict
Exemple #2
0
def assert_shape_equal(shape_a, shape_b):
    """Asserts that shape_a and shape_b are equal.

  If the shapes are static, raises a ValueError when the shapes
  mismatch.

  If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
  mismatch.

  Args:
    shape_a: a list containing shape of the first tensor.
    shape_b: a list containing shape of the second tensor.

  Returns:
    Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
    when the shapes are dynamic.

  Raises:
    ValueError: When shapes are both static and unequal.
  """
    if (all(isinstance(dim, int) for dim in shape_a)
            and all(isinstance(dim, int) for dim in shape_b)):
        if shape_a != shape_b:
            raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
        else:
            return tf.no_op()
    else:
        return tf.assert_equal(shape_a, shape_b)
  def _apply_gradients_cross_replica(self, distribution, grads_and_vars, name,
                                     experimental_aggregate_gradients):
    grads = [g for g, _ in grads_and_vars]
    if isinstance(self._loss_scale, _DynamicLossScaleState):
      loss_scale_update_op, should_apply_grads = self._loss_scale.update(grads)
    else:
      loss_scale_update_op = tf.no_op()
      should_apply_grads = True

    def apply_fn():
      # We do not want DistributionStrategy to unwrap any MirroredVariables in
      # grads_and_vars, because even in a replica context, the wrapped optimizer
      # expects mirrored variables. So we wrap the variables with an
      # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the
      # MirroredVariables.
      wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars])
      return distribution.extended.call_for_each_replica(
          self._apply_gradients,
          args=(grads, wrapped_vars, name, experimental_aggregate_gradients))

    def do_not_apply_fn():
      # Normally self._optimizer.iterations is incremented in
      # self._optimizer.apply_gradients(). Since that is not called in this
      # branch, we increment it here instead.
      return self._optimizer.iterations.assign_add(1, read_value=False)

    # Note: We must call this cond() in a cross-replica context.
    # DistributionStrategy does not support having a cond in a replica context
    # with a branch that calls `merge_call`, and self._optimizer.apply_gradients
    # calls `merge_call`.
    maybe_apply_op = smart_cond.smart_cond(should_apply_grads, apply_fn,
                                           do_not_apply_fn)
    return tf.group(maybe_apply_op, loss_scale_update_op)
  def testDofChangeError(self):
    exp = tfb.Exp()
    smc = tfb.SoftmaxCentered()

    # Increase in event-size is the last step. No problems here.
    safe_bij = tfb.Chain([smc, exp],
                         validate_args=True,
                         validate_event_size=True)
    self.evaluate(safe_bij.forward_log_det_jacobian([1., 2., 3.], 1))

    # Increase in event-size before Exp.
    raise_bij = tfb.Chain([exp, smc],
                          validate_args=True,
                          validate_event_size=True)
    with self.assertRaisesRegex((ValueError, tf.errors.InvalidArgumentError),
                                r".+degrees of freedom.+"):
      self.evaluate(raise_bij.forward_log_det_jacobian([1., 2., 3.], 1))

    # When validate_args is False, warns instead of raising.
    warn_bij = tfb.Chain([exp, smc],
                         validate_args=False,
                         validate_event_size=True)
    with mock.patch.object(tf, "print", return_value=tf.no_op()) as mock_print:
      self.evaluate(warn_bij.forward_log_det_jacobian([1., 2., 3.], 1))
      print_args, _ = mock_print.call_args
      self.assertRegex(print_args[0], r"WARNING:.+degrees of freedom")

    # When validate_event_shape is False, neither warns nor raises.
    ignore_bij = tfb.Chain([exp, smc], validate_event_size=False)
    self.evaluate(ignore_bij.forward_log_det_jacobian([1., 2., 3.], 1))
    def test_increment_global_step(self, use_parameter_scaled_training,
                                   train_steps, maximum_depth, architecture,
                                   expected):
        # Force graph mode
        with tf.compat.v1.Graph().as_default():
            with self.test_session(graph=tf.Graph()) as sess:
                tower_name = "tower"
                if architecture is not None:
                    architecture_utils.set_architecture(architecture,
                                                        tower_name=tower_name)
                spec = self._create_phoenix_spec(problem_type="cnn")
                spec.maximum_depth = maximum_depth
                spec.use_parameter_scaled_training = use_parameter_scaled_training
                instance = phoenix.Phoenix(phoenix_spec=spec,
                                           input_layer_fn=lambda: None,
                                           logits_dimension=0,
                                           study_name="test",
                                           study_owner="test")
                global_step = tf.compat.v1.train.get_or_create_global_step()
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.local_variables_initializer())
                before = sess.run(global_step)

                op = instance._increment_global_step(train_op=tf.no_op(),
                                                     train_steps=train_steps,
                                                     tower_name=tower_name)
                sess.run(op)
                after = sess.run(global_step)

                self.assertEqual(before, 0)
                self.assertEqual(after, expected)
        def _architecture_metric_fn():
            """Manually creates the tf.metric with a serialized tf.Summary proto."""

            # TODO: Should architecture.subnetworks be sorted by iteration
            # number first? Or perhaps, to make this more general, to have one line
            # for each iteration, with "|" as a delimiter if there are multiple
            # subnetworks in one iteration? Something like:
            # 0 linear
            # 1 dnn_width_32_depth_1 | dnn_width_64_depth_1
            # 2
            # 3 dnn_with_32_depth_2
            # Also consider adding ensemble candidate's name, though that is already
            # included in the ensemble name.
            architecture_ = " | ".join(
                [name for _, name in architecture.subnetworks])
            architecture_ = "| {} |".format(architecture_)
            summary_metadata = tf_compat.v1.SummaryMetadata(
                plugin_data=tf_compat.v1.SummaryMetadata.PluginData(
                    plugin_name="text"))
            summary_proto = tf_compat.v1.summary.Summary()
            summary_proto.value.add(metadata=summary_metadata,
                                    tag="architecture/adanet",
                                    tensor=tf_compat.v1.make_tensor_proto(
                                        architecture_, dtype=tf.string))
            architecture_summary = tf.convert_to_tensor(
                value=summary_proto.SerializeToString(), name="architecture")

            return {
                "architecture/adanet/ensembles":
                (architecture_summary, tf.no_op())
            }
 def _decay_weights_op(self, var, learning_rate, apply_state):
     do_decay = self._do_use_weight_decay(var.name)
     if do_decay:
         return var.assign_sub(
             learning_rate * var *
             apply_state[(var.device,
                          var.dtype.base_dtype)]['weight_decay_rate'],
             use_locking=self._use_locking)
     return tf.no_op()
Exemple #8
0
    def build_mixture_weights_train_op(self, loss, var_list, logits, labels,
                                       iteration_step, summary):
        """See `adanet.subnetwork.Builder`."""

        if not self._learn_mixture_weights:
            return tf.no_op("mixture_weights_train_op")

        # NOTE: The `adanet.Estimator` increments the global step.
        return self._optimizer.minimize(loss=loss, var_list=var_list)
    def _resource_apply_dense(self, grad, param, apply_state=None):
        if grad is None or param is None:
            return tf.no_op()

        var_device, var_dtype = param.device, param.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype)) or
                        self._fallback_apply_state(var_device, var_dtype))
        learning_rate = coefficients["lr_t"]

        param_name = param.name

        v = self.get_slot(param, "Momentum")

        if self._use_weight_decay(param_name):
            grad += self.weight_decay * param

        if self.classic_momentum:
            trust_ratio = 1.0
            if self._do_layer_adaptation(param_name):
                w_norm = tf.norm(param, ord=2)
                g_norm = tf.norm(grad, ord=2)
                trust_ratio = tf.where(
                    tf.greater(w_norm, 0),
                    tf.where(tf.greater(g_norm, 0), (self.eeta * w_norm / g_norm), 1.0),
                    1.0)
            scaled_lr = learning_rate * trust_ratio

            next_v = tf.multiply(self.momentum, v) + scaled_lr * grad
            if self.use_nesterov:
                update = tf.multiply(self.momentum, next_v) + scaled_lr * grad
            else:
                update = next_v
            next_param = param - update
        else:
            next_v = tf.multiply(self.momentum, v) + grad
            if self.use_nesterov:
                update = tf.multiply(self.momentum, next_v) + grad
            else:
                update = next_v

            trust_ratio = 1.0
            if self._do_layer_adaptation(param_name):
                w_norm = tf.norm(param, ord=2)
                v_norm = tf.norm(update, ord=2)
                trust_ratio = tf.where(
                    tf.greater(w_norm, 0),
                    tf.where(tf.greater(v_norm, 0), (self.eeta * w_norm / v_norm), 1.0),
                    1.0)
            scaled_lr = trust_ratio * learning_rate
            next_param = param - scaled_lr * update

        return tf.group(*[
            param.assign(next_param, use_locking=False),
            v.assign(next_v, use_locking=False)
        ])
Exemple #10
0
    def _create_host_call(self, current_iteration, training):
        """Construct a host_call writing scalar summaries.

    Args:
      current_iteration: The current `_Iteration`.
      training: Boolean indicating whether in training mode.

    Returns:
      (fn, args) Pair to be called by TPUEstimator as the host_call.
    """

        if not training:
            return lambda **kwargs: [tf.no_op()], {}

        # Collect and flatten summary functions and arguments.
        summary_kwargs = collections.OrderedDict()
        gs_t = tf.reshape(tf.cast(tf.train.get_global_step(), dtype=tf.int32),
                          [1])
        summary_kwargs["global_step"] = gs_t

        summary_fns = collections.defaultdict(list)
        for i, summary in enumerate(current_iteration.summaries):
            for j, (summary_fn, tensor) in enumerate(summary.summary_tuples()):
                summary_fns[i].append(summary_fn)
                summary_kwargs["summary_{}_{}".format(i, j)] = tensor

        def _host_call_fn(**kwargs):
            """Training host call.

      Creates summaries for training metrics.

      Args:
        **kwargs: Dict of {str: Tensor} , with `Tensor` of shape `[batch]`. Must
          contain key "global_step" with value of current global_step Tensor.

      Returns:
        List of summary ops to run on the CPU host.
      """

            from tensorflow.python.ops import summary_ops_v2  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top

            gs = tf.cast(kwargs.pop("global_step")[0], dtype=tf.int64)
            for i, summary in enumerate(current_iteration.summaries):
                with summary_ops_v2.create_file_writer(
                        summary.logdir).as_default():
                    with summary_ops_v2.record_summaries_every_n_global_steps(
                            n=self.config.save_summary_steps, global_step=gs):
                        for j, summary_fn in enumerate(summary_fns[i]):
                            tensor = kwargs["summary_{}_{}".format(i, j)]
                            summary_fn(tensor, step=gs)
                summary.clear_summary_tuples()
            return tf.compat.v1.summary.all_v2_summary_ops()

        return _host_call_fn, summary_kwargs
def _create_estimator_spec(head, features, labels, mode, logits, use_tpu):
    """Creates the head's EstimatorSpec or TPUEstimatorSpec on TPU."""

    if use_tpu:
        create_spec_fn = head._create_tpu_estimator_spec  # pylint: disable=protected-access
    else:
        create_spec_fn = head.create_estimator_spec
    return create_spec_fn(features=features,
                          labels=labels,
                          mode=mode,
                          logits=logits,
                          train_op_fn=lambda _: tf.no_op())
        def _best_eval_metrics_fn(*args):
            """Returns the best eval metrics."""

            with tf_compat.v1.variable_scope("best_eval_metrics"):
                args = list(args)
                idx, idx_update_op = tf_compat.v1.metrics.mean(args.pop())
                idx = tf.cast(idx, tf.int32)
                metric_fns = self._candidates_eval_metrics_store.metric_fns
                metric_fn_args = self._candidates_eval_metrics_store.pack_args(
                    args[:len(candidate_args)])
                candidate_grouped_metrics = self._group_metric_ops(
                    metric_fns, metric_fn_args)

                metric_fns = self._subnetworks_eval_metrics_store.metric_fns
                metric_fn_args = self._subnetworks_eval_metrics_store.pack_args(
                    args[(len(args) - len(subnetwork_args)):])
                subnetwork_grouped_metrics = self._group_metric_ops(
                    metric_fns, metric_fn_args)

                eval_metric_ops = {}
                for metric_name in sorted(candidate_grouped_metrics):
                    metric_ops = candidate_grouped_metrics[metric_name]
                    if len(metric_ops) != len(self._candidates):
                        continue
                    if metric_name == "loss":
                        continue
                    values, ops = list(six.moves.zip(*metric_ops))
                    best_value = tf.stack(values)[idx]
                    # All tensors in this function have been outfed from the TPU, so we
                    # must update them manually, otherwise the TPU will hang indefinitely
                    # for the value of idx to update.
                    ops = list(ops)
                    ops.append(idx_update_op)
                    # Bundle subnetwork eval metric ops and ensemble "loss"" ops (which
                    # is a restricted Estimator keyword) into other metric ops so that
                    # they are computed.
                    ensemble_loss_ops = candidate_grouped_metrics.get(
                        "loss", tf.no_op())
                    all_ops = tf.group(ops, ensemble_loss_ops,
                                       subnetwork_grouped_metrics)
                    eval_metric_ops[metric_name] = (best_value, all_ops)
                iteration_number = tf.constant(self._iteration_number)
                eval_metric_ops["iteration"] = (iteration_number,
                                                iteration_number)

                if self._replay_indices_for_all:
                    _replay_eval_metrics(idx, eval_metric_ops)

                # tf.estimator.Estimator does not allow a "loss" key to be present in
                # its eval_metrics.
                assert "loss" not in eval_metric_ops
                return eval_metric_ops
    def build_train_op(self, ensemble, loss, var_list, labels, iteration_step,
                       summary, previous_ensemble):
        del labels, iteration_step, summary, previous_ensemble  # unused
        optimizer = self._optimizer
        if callable(optimizer):
            optimizer = optimizer()
        if optimizer is None:
            return tf.no_op()

        # The AdaNet Estimator is responsible for incrementing the global step.
        return optimizer.minimize(loss=loss +
                                  ensemble.complexity_regularization,
                                  var_list=var_list)
    def update_partial(self, policy, tau=1.0):
        """Update the current policy with another policy.

    This would include copying the variables from the other policy.

    Args:
      policy: Another policy it can update from.
      tau: A float scalar in [0, 1]. When tau is 1.0 (the default), we do a hard
        update. This is used for trainable variables.

    Returns:
      An TF op to do the update.
    """
        if self.variables():
            policy_vars = policy.variables()
            return common.soft_variables_update(
                policy_vars,
                self.variables()[:len(policy_vars)],
                tau=tau,
                tau_non_trainable=None,
                sort_variables_by_name=True)
        else:
            return tf.no_op()
        def _mixture_weights_train_op_fn(loss, var_list):
            self.assertLen(var_list, want_ensemble_trainable_vars)
            self.assertEqual(
                var_list,
                tf_compat.v1.get_collection(
                    tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES))
            # Subnetworks get iteration steps instead of global steps.
            self.assertEqual("ensemble_test/iteration_step",
                             tf_compat.v1.train.get_global_step().op.name)

            # Subnetworks get scoped summaries.
            self.assertEqual("fake_scalar",
                             tf_compat.v1.summary.scalar("scalar", 1.))
            self.assertEqual("fake_image",
                             tf_compat.v1.summary.image("image", 1.))
            self.assertEqual("fake_histogram",
                             tf_compat.v1.summary.histogram("histogram", 1.))
            self.assertEqual("fake_audio",
                             tf_compat.v1.summary.audio("audio", 1., 1.))
            if not var_list:
                return tf.no_op()
            optimizer = tf_compat.v1.train.GradientDescentOptimizer(
                learning_rate=.1)
            return optimizer.minimize(loss, var_list=var_list)
 def _assert_non_singular(self):
     return tf.no_op('assert_non_singular')
 def _if_should_apply_grads(grads):
     if isinstance(self._loss_scale, _DynamicLossScaleState):
         return self._loss_scale.update(grads)
     else:
         return (tf.no_op(), True)
 def test_build_train_op_no_op(self):
   with context.graph_mode():
     train_op = ensemble.ComplexityRegularizedEnsembler().build_train_op(
         *[None] * 7)  # arguments unused
     self.assertEqual(train_op.type, tf.no_op().type)
Exemple #19
0
  def train_step(self, initial_env_step: dataset_lib.EnvStep,
                 experience: dataset_lib.EnvStep,
                 target_policy: tf_policy.TFPolicy):
    """Performs a single training step based on batch.

    Args:
      initial_env_step: A batch of initial steps.
      experience: A batch of transitions. Elements must have shape [batch_size,
        2, ...].
      target_policy: The policy whose value we want to estimate.

    Returns:
      The losses and the train op.
    """
    env_step = tf.nest.map_structure(lambda t: t[:, 0, ...], experience)
    next_env_step = tf.nest.map_structure(lambda t: t[:, 1, ...], experience)

    with tf.GradientTape(
        watch_accessed_variables=False, persistent=True) as tape:
      tape.watch(self._nu_network.variables)
      tape.watch(self._zeta_network.variables)
      tape.watch(self._weight_network.variables)
      tape.watch([self._alpha])
      nu_loss, zeta_loss = self.train_loss(initial_env_step, env_step,
                                           next_env_step, target_policy)

      nu_reg = self._nu_regularizer * self._orthogonal_regularization(
          self._nu_network)
      zeta_reg = self._zeta_regularizer * self._orthogonal_regularization(
          self._zeta_network)

      # Binary search to find best alpha.
      left = self._alpha - 1 * tf.ones_like(self._two_sided_limit)
      right = self._alpha + 1 * tf.ones_like(self._two_sided_limit)
      for _ in range(4):
        mid = 0.5 * (left + right)
        weights, log_weights = self._get_weights(
            initial_env_step, env_step, next_env_step, nu_loss, alpha=mid)
        divergence = self._compute_divergence(weights, log_weights)
        divergence_violation = divergence - self._two_sided_limit
        left = tf.where(divergence_violation > 0., mid, left)
        right = tf.where(divergence_violation > 0., right, mid)
      best_alpha = 0.5 * (left + right)
      self._alpha.assign(0.05 * best_alpha + 0.95 * self._alpha)

      weights, log_weights = self._get_weights(initial_env_step, env_step,
                                               next_env_step, nu_loss)
      divergence = self._compute_divergence(weights, log_weights)
      divergence_violation = divergence - self._two_sided_limit

      weighted_nu_loss = tf.reshape(
          nu_loss, [-1, self._num_limits, 2]) * weights[:, :, None]
      weighted_zeta_loss = tf.reshape(
          zeta_loss, [-1, self._num_limits, 2]) * weights[:, :, None]

      # Multiplier to make all weight optimizations minimizations.
      # Takes into account that sign of algae_alpha determines whether nu_loss
      # has switched signs (since all of nu_loss is used for minimization).
      weight_loss_multiplier = self._algae_alpha_sign * tf.concat(
          2 * [tf.ones_like(self._divergence_limit)] +
          2 * [-tf.ones_like(self._divergence_limit)],
          axis=-1)
      weight_loss = tf.reduce_mean(
          tf.reshape(weight_loss_multiplier * nu_loss,
                     [-1, self._num_limits, 2]), 1)
      weight_loss += tf.exp(self._alpha) * divergence_violation

      reg_weighted_nu_loss = (weighted_nu_loss + nu_reg)
      reg_weighted_zeta_loss = (weighted_zeta_loss + nu_reg)

      alpha_loss = (-tf.exp(self._alpha) *
                    tf.stop_gradient(divergence_violation))

    nu_grads = tape.gradient(reg_weighted_nu_loss, self._nu_network.variables)
    nu_grad_op = self._nu_optimizer.apply_gradients(
        zip(nu_grads, self._nu_network.variables))

    zeta_grads = tape.gradient(reg_weighted_zeta_loss,
                               self._zeta_network.variables)
    zeta_grad_op = self._zeta_optimizer.apply_gradients(
        zip(zeta_grads, self._zeta_network.variables))

    if not self._closed_form_weights:
      weight_grads = tape.gradient(weight_loss, self._weight_network.variables)
      weight_grad_op = self._weight_optimizer.apply_gradients(
          zip(weight_grads, self._weight_network.variables))
    else:
      weight_grad_op = tf.group()

    alpha_grads = tape.gradient(alpha_loss, [self._alpha])
    #alpha_grad_op = self._alpha_optimizer.apply_gradients(
    #    zip(alpha_grads, [self._alpha]))
    alpha_grad_op = tf.no_op()

    for idx in range(self._num_limits):
      tf.summary.scalar('divergence%d' % idx, divergence[idx])
      tf.summary.scalar('nu_loss%d' % idx, tf.reduce_mean(nu_loss, 0)[idx])
      tf.summary.scalar('zeta_loss%d' % idx, tf.reduce_mean(zeta_loss, 0)[idx])
      tf.summary.scalar('exp_alpha%d' % idx, tf.exp(self._alpha[idx]))
      tf.summary.histogram('weights%d' % idx, weights[:, idx])

    estimate = tf.reduce_mean(
        weighted_nu_loss *
        tf.reshape(self._algae_alpha_sign, [self._num_limits, 2]),
        axis=[0, -1])
    return ((estimate, tf.reshape(tf.reduce_mean(weighted_nu_loss, [0]), [-1]),
             tf.reshape(tf.reduce_mean(weighted_zeta_loss, [0]),
                        [-1]), tf.reduce_mean(weight_loss,
                                              0), alpha_loss, divergence),
            tf.group(nu_grad_op, zeta_grad_op, weight_grad_op, alpha_grad_op))
    def _make_metrics(self,
                      metric_fn,
                      mode=tf.estimator.ModeKeys.EVAL,
                      multi_head=False,
                      sess=None):

        with context.graph_mode():
            if multi_head:
                head = multi_head_lib.MultiHead(heads=[
                    binary_class_head.BinaryClassHead(
                        name="head1", loss_reduction=tf_compat.SUM),
                    binary_class_head.BinaryClassHead(
                        name="head2", loss_reduction=tf_compat.SUM)
                ])
                labels = {
                    "head1": tf.constant([0, 1]),
                    "head2": tf.constant([0, 1])
                }
            else:
                head = binary_class_head.BinaryClassHead(
                    loss_reduction=tf_compat.SUM)
                labels = tf.constant([0, 1])
            features = {"x": tf.constant([[1.], [2.]])}
            builder = _EnsembleBuilder(head, metric_fn=metric_fn)
            subnetwork_manager = _SubnetworkManager(head, metric_fn=metric_fn)
            subnetwork_builder = _Builder(lambda unused0, unused1: tf.no_op(),
                                          lambda unused0, unused1: tf.no_op(),
                                          use_logits_last_layer=True)

            subnetwork_spec = subnetwork_manager.build_subnetwork_spec(
                name="test",
                subnetwork_builder=subnetwork_builder,
                summary=_FakeSummary(),
                features=features,
                mode=mode,
                labels=labels)
            ensemble_spec = builder.build_ensemble_spec(
                name="test",
                candidate=EnsembleCandidate("foo", [subnetwork_builder], None),
                ensembler=ComplexityRegularizedEnsembler(
                    mixture_weight_type=MixtureWeightType.SCALAR),
                subnetwork_specs=[subnetwork_spec],
                summary=_FakeSummary(),
                features=features,
                iteration_number=0,
                labels=labels,
                mode=mode,
                my_ensemble_index=0,
                previous_ensemble_spec=None,
                previous_iteration_checkpoint=None)
            subnetwork_metric_ops = subnetwork_spec.eval_metrics.eval_metrics_ops(
            )
            ensemble_metric_ops = ensemble_spec.eval_metrics.eval_metrics_ops()
            evaluate = self.evaluate
            if sess is not None:
                evaluate = sess.run
            evaluate((tf_compat.v1.global_variables_initializer(),
                      tf_compat.v1.local_variables_initializer()))
            evaluate((subnetwork_metric_ops, ensemble_metric_ops))
            # Return the idempotent tensor part of the (tensor, op) metrics tuple.
            return {
                k: evaluate(subnetwork_metric_ops[k][0])
                for k in subnetwork_metric_ops
            }, {
                k: evaluate(ensemble_metric_ops[k][0])
                for k in ensemble_metric_ops
            }
class IterationBuilderTest(tu.AdanetTestCase):

  @parameterized.named_parameters(
      {
          "testcase_name": "negative_max_steps",
          "max_steps": -1,
      }, {
          "testcase_name": "zero_max_steps",
          "max_steps": 0,
      })
  @test_util.run_in_graph_and_eager_modes
  def test_init_errors(self, max_steps):
    with self.assertRaises(ValueError):
      _IterationBuilder(
          _FakeCandidateBuilder(),
          _FakeSubnetworkManager(),
          _FakeEnsembleBuilder(),
          summary_maker=_ScopedSummary,
          ensemblers=[_FakeEnsembler()],
          max_steps=max_steps)

  # pylint: disable=g-long-lambda
  @parameterized.named_parameters(
      {
          "testcase_name": "single_subnetwork_fn",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [_FakeBuilder("training")],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name":
              "single_subnetwork_fn_mock_summary",
          "ensemble_builder":
              _FakeEnsembleBuilder(),
          "subnetwork_builders": [_FakeBuilder("training")],
          "summary_maker":
              functools.partial(_TPUScopedSummary, logdir="/tmp/fakedir"),
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_loss":
              1.403943,
          "want_predictions":
              2.129,
          "want_best_candidate_index":
              0,
      },
      {
          "testcase_name":
              "single_subnetwork_with_eval_metrics",
          "ensemble_builder":
              _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                   {"a": (tf.constant(1), tf.constant(2))}),
          "subnetwork_builders": [_FakeBuilder("training",),],
          "mode":
              tf.estimator.ModeKeys.EVAL,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_loss":
              1.403943,
          "want_predictions":
              2.129,
          "want_eval_metric_ops": ["a", "iteration"],
          "want_best_candidate_index":
              0,
      },
      {
          "testcase_name":
              "single_subnetwork_with_non_tensor_eval_metric_op",
          "ensemble_builder":
              _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                   {"a": (tf.constant(1), tf.no_op())}),
          "subnetwork_builders": [_FakeBuilder("training",),],
          "mode":
              tf.estimator.ModeKeys.EVAL,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_loss":
              1.403943,
          "want_predictions":
              2.129,
          "want_eval_metric_ops": ["a", "iteration"],
          "want_best_candidate_index":
              0,
      },
      {
          "testcase_name": "single_subnetwork_done_training_fn",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [_FakeBuilder("done")],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name": "single_dict_predictions_subnetwork_fn",
          "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True),
          "subnetwork_builders": [_FakeBuilder("training")],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name": "previous_ensemble",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [_FakeBuilder("training")],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "previous_iteration":
              lambda: _FakeIteration(
                  tu.dummy_ensemble_spec("old", variables=[tf.Variable(1.)])),
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 1,
      },
      {
          "testcase_name":
              "previous_ensemble_is_best",
          "ensemble_builder":
              _FakeEnsembleBuilder(),
          "subnetwork_builders": [_FakeBuilder("training")],
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "previous_iteration":
              lambda: _FakeIteration(
                  tu.dummy_ensemble_spec(
                      "old", random_seed=12, variables=[tf.Variable(1.)])),
          "want_loss":
              -.437,
          "want_predictions":
              .688,
          "want_best_candidate_index":
              0,
      },
      {
          "testcase_name":
              "previous_ensemble_spec_and_eval_metrics",
          "ensemble_builder":
              _FakeEnsembleBuilder(eval_metric_ops_fn=lambda:
                                   {"a": (tf.constant(1), tf.constant(2))}),
          "subnetwork_builders": [_FakeBuilder("training")],
          "mode":
              tf.estimator.ModeKeys.EVAL,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "previous_iteration":
              lambda: _FakeIteration(
                  tu.dummy_ensemble_spec(
                      "old",
                      eval_metrics=tu.create_ensemble_metrics(
                          metric_fn=lambda:
                          {"a": (tf.constant(1), tf.constant(2))}),
                      variables=[tf.Variable(1.)])),
          "want_loss":
              1.403943,
          "want_predictions":
              2.129,
          "want_eval_metric_ops": ["a", "iteration"],
          "want_best_candidate_index":
              1,
      },
      {
          "testcase_name": "two_subnetwork_fns",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.40394,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name": "two_subnetwork_fns_other_best",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=12)
          ],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": -.437,
          "want_predictions": .688,
          "want_best_candidate_index": 1,
      },
      {
          "testcase_name": "two_subnetwork_one_training_fns",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders":
              [_FakeBuilder("training"),
               _FakeBuilder("done", random_seed=7)],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name": "two_subnetwork_done_training_fns",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders":
              [_FakeBuilder("done"),
               _FakeBuilder("done1", random_seed=7)],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name": "two_dict_predictions_subnetwork_fns",
          "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.404,
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index": 0,
      },
      {
          "testcase_name":
              "two_dict_predictions_subnetwork_fns_predict_classes",
          "ensemble_builder":
              _FakeEnsembleBuilder(
                  dict_predictions=True,
                  export_output_key=tu.ExportOutputKeys.CLASSIFICATION_CLASSES),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "mode":
              tf.estimator.ModeKeys.PREDICT,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_loss":
              1.404,
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index":
              0,
          "want_export_outputs": {
              tu.ExportOutputKeys.CLASSIFICATION_CLASSES: [2.129],
              "serving_default": [2.129],
          },
      },
      {
          "testcase_name":
              "two_dict_predictions_subnetwork_fns_predict_scores",
          "ensemble_builder":
              _FakeEnsembleBuilder(
                  dict_predictions=True,
                  export_output_key=tu.ExportOutputKeys.CLASSIFICATION_SCORES),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "mode":
              tf.estimator.ModeKeys.PREDICT,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_loss":
              1.404,
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index":
              0,
          "want_export_outputs": {
              tu.ExportOutputKeys.CLASSIFICATION_SCORES: [2.129],
              "serving_default": [2.129],
          },
      },
      {
          "testcase_name":
              "two_dict_predictions_subnetwork_fns_predict_regression",
          "ensemble_builder":
              _FakeEnsembleBuilder(
                  dict_predictions=True,
                  export_output_key=tu.ExportOutputKeys.REGRESSION),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "mode":
              tf.estimator.ModeKeys.PREDICT,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index":
              0,
          "want_export_outputs": {
              tu.ExportOutputKeys.REGRESSION: 2.129,
              "serving_default": 2.129,
          },
      },
      {
          "testcase_name":
              "two_dict_predictions_subnetwork_fns_predict_prediction",
          "ensemble_builder":
              _FakeEnsembleBuilder(
                  dict_predictions=True,
                  export_output_key=tu.ExportOutputKeys.PREDICTION),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "mode":
              tf.estimator.ModeKeys.PREDICT,
          "features":
              lambda: [[1., -1., 0.]],
          "labels":
              lambda: [1],
          "want_predictions": {
              "classes": 2,
              "logits": 2.129
          },
          "want_best_candidate_index":
              0,
          "want_export_outputs": {
              tu.ExportOutputKeys.PREDICTION: {
                  "classes": 2,
                  "logits": 2.129
              },
              "serving_default": {
                  "classes": 2,
                  "logits": 2.129
              },
          },
      },
      {
          "testcase_name": "chief_session_run_hook",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [
              _FakeBuilder("training", chief_hook=tu.ModifierSessionRunHook())
          ],
          "features": lambda: [[1., -1., 0.]],
          "labels": lambda: [1],
          "want_loss": 1.403943,
          "want_predictions": 2.129,
          "want_best_candidate_index": 0,
          "want_chief_hooks": True,
      })
  @test_util.run_in_graph_and_eager_modes
  def test_build_iteration(self,
                           ensemble_builder,
                           subnetwork_builders,
                           features,
                           labels,
                           want_predictions,
                           want_best_candidate_index,
                           want_eval_metric_ops=(),
                           previous_iteration=None,
                           want_loss=None,
                           want_export_outputs=None,
                           mode=tf.estimator.ModeKeys.TRAIN,
                           summary_maker=_ScopedSummary,
                           want_chief_hooks=False):
    with context.graph_mode():
      tf_compat.v1.train.create_global_step()
      builder = _IterationBuilder(
          _FakeCandidateBuilder(),
          _FakeSubnetworkManager(),
          ensemble_builder,
          summary_maker=summary_maker,
          ensemblers=[_FakeEnsembler()],
          max_steps=1)
      iteration = builder.build_iteration(
          base_global_step=0,
          iteration_number=0,
          ensemble_candidates=[
              EnsembleCandidate(b.name, [b], None) for b in subnetwork_builders
          ],
          previous_iteration=previous_iteration()
          if previous_iteration else None,
          subnetwork_builders=subnetwork_builders,
          features=features(),
          labels=labels(),
          mode=mode,
          config=tf.estimator.RunConfig(model_dir=self.test_subdirectory))
      init = tf.group(tf_compat.v1.global_variables_initializer(),
                      tf_compat.v1.local_variables_initializer())
      self.evaluate(init)
      estimator_spec = iteration.estimator_spec
      if want_chief_hooks:
        self.assertNotEmpty(iteration.estimator_spec.training_chief_hooks)
      self.assertAllClose(
          want_predictions,
          self.evaluate(estimator_spec.predictions),
          atol=1e-3)
      # A default architecture metric is always included, even if we don't
      # specify one.
      eval_metric_ops = estimator_spec.eval_metric_ops
      if "architecture/adanet/ensembles" in eval_metric_ops:
        del eval_metric_ops["architecture/adanet/ensembles"]
      self.assertEqual(set(want_eval_metric_ops), set(eval_metric_ops.keys()))

      self.assertEqual(want_best_candidate_index,
                       self.evaluate(iteration.best_candidate_index))
      if mode == tf.estimator.ModeKeys.PREDICT:
        self.assertIsNotNone(estimator_spec.export_outputs)
        self.assertAllClose(
            want_export_outputs,
            self.evaluate(
                _export_output_tensors(estimator_spec.export_outputs)),
            atol=1e-3)
        self.assertIsNone(iteration.estimator_spec.train_op)
        self.assertIsNone(iteration.estimator_spec.loss)
        self.assertIsNotNone(want_export_outputs)
        return

      self.assertAlmostEqual(
          want_loss, self.evaluate(iteration.estimator_spec.loss), places=3)
      self.assertIsNone(iteration.estimator_spec.export_outputs)
      if mode == tf.estimator.ModeKeys.TRAIN:
        self.evaluate(iteration.estimator_spec.train_op)

  @parameterized.named_parameters(
      {
          "testcase_name": "empty_subnetwork_builders",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders": [],
          "want_raises": ValueError,
      }, {
          "testcase_name": "same_subnetwork_builder_names",
          "ensemble_builder": _FakeEnsembleBuilder(),
          "subnetwork_builders":
              [_FakeBuilder("same_name"),
               _FakeBuilder("same_name")],
          "want_raises": ValueError,
      }, {
          "testcase_name":
              "same_ensembler_names",
          "ensemble_builder":
              _FakeEnsembleBuilder(),
          "multiple_candidates": True,
          "subnetwork_builders": [_FakeBuilder("fake_builder_name")],
          "want_raises":
              ValueError,
      }, {
          "testcase_name":
              "predict_invalid",
          "ensemble_builder":
              _FakeEnsembleBuilder(
                  dict_predictions=True,
                  export_output_key=tu.ExportOutputKeys.INVALID),
          "subnetwork_builders": [
              _FakeBuilder("training"),
              _FakeBuilder("training2", random_seed=7)
          ],
          "mode":
              tf.estimator.ModeKeys.PREDICT,
          "want_raises":
              TypeError,
      })
  @test_util.run_in_graph_and_eager_modes
  def test_build_iteration_error(self,
                                 ensemble_builder,
                                 subnetwork_builders,
                                 want_raises,
                                 multiple_candidates=False,
                                 mode=tf.estimator.ModeKeys.TRAIN,
                                 summary_maker=_ScopedSummary):
    with context.graph_mode():
      tf_compat.v1.train.create_global_step()
      builder = _IterationBuilder(
          _FakeCandidateBuilder(),
          _FakeSubnetworkManager(),
          ensemble_builder,
          summary_maker=summary_maker,
          ensemblers=[_FakeEnsembler()],
          max_steps=100)
      features = [[1., -1., 0.]]
      labels = [1]
      ensemble_candidates = [
          EnsembleCandidate("test", subnetwork_builders, None)
      ]
      if multiple_candidates:
        ensemble_candidates += [
            EnsembleCandidate("test", subnetwork_builders, None)
        ]
      with self.assertRaises(want_raises):
        builder.build_iteration(
            base_global_step=0,
            iteration_number=0,
            ensemble_candidates=ensemble_candidates,
            subnetwork_builders=subnetwork_builders,
            features=features,
            labels=labels,
            mode=mode,
            config=tf.estimator.RunConfig(model_dir=self.test_subdirectory))
 def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
                               iteration_step, summary, previous_ensemble):
   if self._chief_hook:
     return TrainOpSpec(
         train_op=tf.no_op(), chief_hooks=[self._chief_hook], hooks=None)
   return None
Exemple #23
0
class ReportMaterializerTest(parameterized.TestCase, tf.test.TestCase):

  # pylint: disable=g-long-lambda
  @parameterized.named_parameters(
      {
          "testcase_name":
              "one_empty_subnetwork",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(hparams={}, attributes={}, metrics={}),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo",
                  hparams={},
                  attributes={},
                  metrics={},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "one_subnetwork",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(
                          hparams={
                              "learning_rate": 1.e-5,
                              "optimizer": "sgd",
                              "num_layers": 0,
                              "use_side_inputs": True,
                          },
                          attributes={
                              "weight_norms": tf.constant(3.14),
                              "foo": tf.constant("bar"),
                              "parameters": tf.constant(7777),
                              "boo": tf.constant(True),
                          },
                          metrics={},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo",
                  hparams={
                      "learning_rate": 1.e-5,
                      "optimizer": "sgd",
                      "num_layers": 0,
                      "use_side_inputs": True,
                  },
                  attributes={
                      "weight_norms": 3.14,
                      "foo": "bar",
                      "parameters": 7777,
                      "boo": True,
                  },
                  metrics={},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "one_subnetwork_iteration_2",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(
                          hparams={
                              "learning_rate": 1.e-5,
                              "optimizer": "sgd",
                              "num_layers": 0,
                              "use_side_inputs": True,
                          },
                          attributes={
                              "weight_norms": tf.constant(3.14),
                              "foo": tf.constant("bar"),
                              "parameters": tf.constant(7777),
                              "boo": tf.constant(True),
                          },
                          metrics={},
                      ),
              },
          "steps":
              3,
          "iteration_number":
              2,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=2,
                  name="foo",
                  hparams={
                      "learning_rate": 1.e-5,
                      "optimizer": "sgd",
                      "num_layers": 0,
                      "use_side_inputs": True,
                  },
                  attributes={
                      "weight_norms": 3.14,
                      "foo": "bar",
                      "parameters": 7777,
                      "boo": True,
                  },
                  metrics={},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "two_subnetworks",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo1":
                      subnetwork.Report(
                          hparams={
                              "learning_rate": 1.e-5,
                              "optimizer": "sgd",
                              "num_layers": 0,
                              "use_side_inputs": True,
                          },
                          attributes={
                              "weight_norms": tf.constant(3.14),
                              "foo": tf.constant("bar"),
                              "parameters": tf.constant(7777),
                              "boo": tf.constant(True),
                          },
                          metrics={},
                      ),
                  "foo2":
                      subnetwork.Report(
                          hparams={
                              "learning_rate": 1.e-6,
                              "optimizer": "sgd",
                              "num_layers": 1,
                              "use_side_inputs": True,
                          },
                          attributes={
                              "weight_norms": tf.constant(3.1445),
                              "foo": tf.constant("baz"),
                              "parameters": tf.constant(7788),
                              "boo": tf.constant(True),
                          },
                          metrics={},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo2"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo1",
                  hparams={
                      "learning_rate": 1.e-5,
                      "optimizer": "sgd",
                      "num_layers": 0,
                      "use_side_inputs": True,
                  },
                  attributes={
                      "weight_norms": 3.14,
                      "foo": "bar",
                      "parameters": 7777,
                      "boo": True,
                  },
                  metrics={},
                  included_in_final_ensemble=False,
              ),
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo2",
                  hparams={
                      "learning_rate": 1.e-6,
                      "optimizer": "sgd",
                      "num_layers": 1,
                      "use_side_inputs": True,
                  },
                  attributes={
                      "weight_norms": 3.1445,
                      "foo": "baz",
                      "parameters": 7788,
                      "boo": True,
                  },
                  metrics={},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "two_subnetworks_zero_included",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo1":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={},
                      ),
                  "foo2":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": [],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo1",
                  hparams={},
                  attributes={},
                  metrics={},
                  included_in_final_ensemble=False,
              ),
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo2",
                  hparams={},
                  attributes={},
                  metrics={},
                  included_in_final_ensemble=False,
              ),
          ],
      }, {
          "testcase_name":
              "two_subnetworks_both_included",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo1":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={},
                      ),
                  "foo2":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo1", "foo2"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo1",
                  hparams={},
                  attributes={},
                  metrics={},
                  included_in_final_ensemble=True,
              ),
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo2",
                  hparams={},
                  attributes={},
                  metrics={},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "materialize_metrics",
          "input_fn":
              tu.dummy_input_fn([[1., 1.], [1., 1.], [1., 1.]],
                                [[1.], [2.], [3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={"moo": tf_compat.v1.metrics.mean(labels)},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo",
                  hparams={},
                  attributes={},
                  metrics={"moo": 2.},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "materialize_metrics_none_steps",
          "input_fn":
              tu.dataset_input_fn([[1., 1.], [1., 1.], [1., 1.]],
                                  [[1.], [2.], [3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={"moo": tf_compat.v1.metrics.mean(labels)},
                      ),
              },
          "steps":
              None,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo",
                  hparams={},
                  attributes={},
                  metrics={"moo": 2.},
                  included_in_final_ensemble=True,
              ),
          ],
      }, {
          "testcase_name":
              "materialize_metrics_non_tensor_op",
          "input_fn":
              tu.dummy_input_fn([[1., 2]], [[3.]]),
          "subnetwork_reports_fn":
              lambda features, labels: {
                  "foo":
                      subnetwork.Report(
                          hparams={},
                          attributes={},
                          metrics={"moo": (tf.constant(42), tf.no_op())},
                      ),
              },
          "steps":
              3,
          "included_subnetwork_names": ["foo"],
          "want_materialized_reports": [
              subnetwork.MaterializedReport(
                  iteration_number=0,
                  name="foo",
                  hparams={},
                  attributes={},
                  metrics={"moo": 42},
                  included_in_final_ensemble=True,
              ),
          ],
      })
  @test_util.run_in_graph_and_eager_modes
  def test_materialize_subnetwork_reports(self,
                                          input_fn,
                                          subnetwork_reports_fn,
                                          steps,
                                          iteration_number=0,
                                          included_subnetwork_names=None,
                                          want_materialized_reports=None):
    with context.graph_mode():
      tf.constant(0.)  # dummy op so that the session graph is never empty.
      features, labels = input_fn()
      subnetwork_reports = subnetwork_reports_fn(features, labels)
      with self.test_session() as sess:
        sess.run(tf_compat.v1.initializers.local_variables())
        report_materializer = ReportMaterializer(input_fn=input_fn, steps=steps)
        materialized_reports = (
            report_materializer.materialize_subnetwork_reports(
                sess, iteration_number, subnetwork_reports,
                included_subnetwork_names))
        self.assertEqual(
            len(want_materialized_reports), len(materialized_reports))
        materialized_reports_dict = {
            blrm.name: blrm for blrm in materialized_reports
        }
        for want_materialized_report in want_materialized_reports:
          materialized_report = (
              materialized_reports_dict[want_materialized_report.name])
          self.assertEqual(iteration_number,
                           materialized_report.iteration_number)
          self.assertEqual(
              set(want_materialized_report.hparams.keys()),
              set(materialized_report.hparams.keys()))
          for hparam_key, want_hparam in (
              want_materialized_report.hparams.items()):
            if isinstance(want_hparam, float):
              self.assertAllClose(want_hparam,
                                  materialized_report.hparams[hparam_key])
            else:
              self.assertEqual(want_hparam,
                               materialized_report.hparams[hparam_key])

          self.assertSetEqual(
              set(want_materialized_report.attributes.keys()),
              set(materialized_report.attributes.keys()))
          for attribute_key, want_attribute in (
              want_materialized_report.attributes.items()):
            if isinstance(want_attribute, float):
              self.assertAllClose(
                  want_attribute,
                  decode(materialized_report.attributes[attribute_key]))
            else:
              self.assertEqual(
                  want_attribute,
                  decode(materialized_report.attributes[attribute_key]))

          self.assertSetEqual(
              set(want_materialized_report.metrics.keys()),
              set(materialized_report.metrics.keys()))
          for metric_key, want_metric in (
              want_materialized_report.metrics.items()):
            if isinstance(want_metric, float):
              self.assertAllClose(
                  want_metric, decode(materialized_report.metrics[metric_key]))
            else:
              self.assertEqual(want_metric,
                               decode(materialized_report.metrics[metric_key]))
Exemple #24
0
def eval_op_fn(loss):
    del loss
    return tf.no_op(), []
Exemple #25
0
    def build_subnetwork(self,
                         features,
                         labels,
                         logits_dimension,
                         training,
                         iteration_step,
                         summary,
                         previous_ensemble,
                         config=None):
        # We don't need an EVAL mode since AdaNet takes care of evaluation for us.
        mode = tf.estimator.ModeKeys.PREDICT
        if training:
            mode = tf.estimator.ModeKeys.TRAIN

        # Call in template to ensure that variables are created once and reused.
        call_model_fn_template = tf.compat.v1.make_template(
            "model_fn", self._call_model_fn)
        subestimator_features, subestimator_labels = features, labels
        local_init_ops = []
        subestimator = self._subestimator(config)
        if training and subestimator.train_input_fn:
            # TODO: Consider tensorflow_estimator/python/estimator/util.py.
            inputs = subestimator.train_input_fn()
            if isinstance(inputs, (tf_compat.DatasetV1, tf_compat.DatasetV2)):
                subestimator_features, subestimator_labels = (
                    tf_compat.make_one_shot_iterator(inputs).get_next())
            else:
                subestimator_features, subestimator_labels = inputs

            # Construct subnetwork graph first because of dependencies on scope.
            _, _, bagging_train_op_spec, sub_local_init_op = call_model_fn_template(
                subestimator, subestimator_features, subestimator_labels, mode,
                summary)
            # Graph for ensemble learning gets model_fn_1 for scope.
            logits, last_layer, _, ensemble_local_init_op = call_model_fn_template(
                subestimator, features, labels, mode, summary)

            if sub_local_init_op:
                local_init_ops.append(sub_local_init_op)
            if ensemble_local_init_op:
                local_init_ops.append(ensemble_local_init_op)

            # Run train op in a hook so that exceptions can be intercepted by the
            # AdaNet framework instead of the Estimator's monitored training session.
            hooks = bagging_train_op_spec.hooks + (_SecondaryTrainOpRunnerHook(
                bagging_train_op_spec.train_op), )
            train_op_spec = subnetwork_lib.TrainOpSpec(
                train_op=tf.no_op(),
                chief_hooks=bagging_train_op_spec.chief_hooks,
                hooks=hooks)
        else:
            logits, last_layer, train_op_spec, local_init_op = call_model_fn_template(
                subestimator, features, labels, mode, summary)
            if local_init_op:
                local_init_ops.append(local_init_op)

        # TODO: Replace with variance complexity measure.
        complexity = tf.constant(0.)
        return subnetwork_lib.Subnetwork(logits=logits,
                                         last_layer=last_layer,
                                         shared={"train_op": train_op_spec},
                                         complexity=complexity,
                                         local_init_ops=local_init_ops)
 def initialize(self):
     if tf.executing_eagerly():
         return tf.no_op()
     else:
         return self._initializers
Exemple #27
0
def model_fn(features, labels, mode, params, config):
  """Build the model function for use in an estimator.

  Args:
    features: The input features for the estimator.
    labels: The labels, unused here.
    mode: Signifies whether it is train or test or predict.
    params: Some hyperparameters as a dictionary.
    config: The RunConfig, unused here.
  Returns:
    EstimatorSpec: A tf.estimator.EstimatorSpec instance.
  """
  del labels, config

  encoder = make_encoder(params["activation"],
                         params["num_topics"],
                         params["layer_sizes"])
  decoder, topics_words = make_decoder(params["num_topics"],
                                       features.shape[1])
  topics_prior = make_prior(params["num_topics"],
                            params["prior_initial_value"])

  alpha = topics_prior.concentration

  topics_posterior = encoder(features)
  topics = topics_posterior.sample(seed=234)
  random_reconstruction = decoder(topics)

  reconstruction = random_reconstruction.log_prob(features)
  tf1.summary.scalar("reconstruction", tf.reduce_mean(reconstruction))

  # Compute the KL-divergence between two Dirichlets analytically.
  # The sampled KL does not work well for "sparse" distributions
  # (see Appendix D of [2]).
  kl = tfd.kl_divergence(topics_posterior, topics_prior)
  tf1.summary.scalar("kl", tf.reduce_mean(kl))

  # Ensure that the KL is non-negative (up to a very small slack).
  # Negative KL can happen due to numerical instability.
  with tf.control_dependencies(
      [tf.debugging.assert_greater(kl, -1e-3, message="kl")]):
    kl = tf.identity(kl)

  elbo = reconstruction - kl
  avg_elbo = tf.reduce_mean(elbo)
  tf1.summary.scalar("elbo", avg_elbo)
  loss = -avg_elbo

  # Perform variational inference by minimizing the -ELBO.
  global_step = tf1.train.get_or_create_global_step()
  optimizer = tf1.train.AdamOptimizer(params["learning_rate"])

  # This implements the "burn-in" for prior parameters (see Appendix D of [2]).
  # For the first prior_burn_in_steps steps they are fixed, and then trained
  # jointly with the other parameters.
  grads_and_vars = optimizer.compute_gradients(loss)
  grads_and_vars_except_prior = [
      x for x in grads_and_vars if x[1] not in topics_prior.variables]

  def train_op_except_prior():
    return optimizer.apply_gradients(
        grads_and_vars_except_prior,
        global_step=global_step)

  def train_op_all():
    return optimizer.apply_gradients(
        grads_and_vars,
        global_step=global_step)

  train_op = tf.cond(
      pred=global_step < params["prior_burn_in_steps"],
      true_fn=train_op_except_prior,
      false_fn=train_op_all)

  # The perplexity is an exponent of the average negative ELBO per word.
  words_per_document = tf.reduce_sum(features, axis=1)
  log_perplexity = -elbo / words_per_document
  tf1.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity)))
  (log_perplexity_tensor,
   log_perplexity_update) = tf1.metrics.mean(log_perplexity)
  perplexity_tensor = tf.exp(log_perplexity_tensor)

  # Obtain the topics summary. Implemented as a py_func for simplicity.
  topics = tf1.py_func(
      functools.partial(get_topics_strings, vocabulary=params["vocabulary"]),
      [topics_words, alpha],
      tf.string,
      stateful=False)
  tf1.summary.text("topics", topics)

  return tf1.estimator.EstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      eval_metric_ops={
          "elbo": tf1.metrics.mean(elbo),
          "reconstruction": tf1.metrics.mean(reconstruction),
          "kl": tf1.metrics.mean(kl),
          "perplexity": (perplexity_tensor, log_perplexity_update),
          "topics": (topics, tf.no_op()),
      },
  )
 def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
                               iteration_step, summary, previous_ensemble):
     return tf.no_op()
Exemple #29
0
class ReportTest(parameterized.TestCase, tf.test.TestCase):

  # pylint: disable=g-long-lambda
  @parameterized.named_parameters(
      {
          "testcase_name": "empty",
          "hparams": {},
          "attributes": lambda: {},
          "metrics": lambda: {},
      }, {
          "testcase_name": "non_empty",
          "hparams": {
              "hoo": 1
          },
          "attributes": lambda: {
              "aoo": tf.constant(1)
          },
          "metrics": lambda: {
              "moo": (tf.constant(1), tf.constant(1))
          },
      }, {
          "testcase_name": "non_tensor_update_op",
          "hparams": {
              "hoo": 1
          },
          "attributes": lambda: {
              "aoo": tf.constant(1)
          },
          "metrics": lambda: {
              "moo": (tf.constant(1), tf.no_op())
          },
      })
  # pylint: enable=g-long-lambda
  @test_util.run_in_graph_and_eager_modes
  def test_new(self, hparams, attributes, metrics):
    with context.graph_mode():
      _ = tf.constant(0)  # Just to have a non-empty graph.
      report = Report(
          hparams=hparams, attributes=attributes(), metrics=metrics())
      self.assertEqual(hparams, report.hparams)
      self.assertEqual(
          self.evaluate(attributes()), self.evaluate(report.attributes))
      self.assertEqual(self.evaluate(metrics()), self.evaluate(report.metrics))

  @test_util.run_in_graph_and_eager_modes
  def test_drop_non_scalar_metric(self):
    """Tests b/118632346."""

    hparams = {"hoo": 1}
    attributes = {"aoo": tf.constant(1)}
    metrics = {
        "moo1": (tf.constant(1), tf.constant(1)),
        "moo2": (tf.constant([1, 1]), tf.constant([1, 1])),
    }
    want_metrics = metrics.copy()
    del want_metrics["moo2"]
    with self.test_session():
      report = Report(hparams=hparams, attributes=attributes, metrics=metrics)
      self.assertEqual(hparams, report.hparams)
      self.assertEqual(attributes, report.attributes)
      self.assertEqual(want_metrics, report.metrics)

  @parameterized.named_parameters(
      {
          "testcase_name": "tensor_hparams",
          "hparams": {
              "hoo": tf.constant(1)
          },
          "attributes": {},
          "metrics": {},
      }, {
          "testcase_name": "non_tensor_attributes",
          "hparams": {},
          "attributes": {
              "aoo": 1,
          },
          "metrics": {},
      }, {
          "testcase_name": "non_tuple_metrics",
          "hparams": {},
          "attributes": {},
          "metrics": {
              "moo": tf.constant(1)
          },
      }, {
          "testcase_name": "one_item_tuple_metrics",
          "hparams": {},
          "attributes": {},
          "metrics": {
              "moo": (tf.constant(1),)
          },
      })
  @test_util.run_in_graph_and_eager_modes
  def test_new_errors(self, hparams, attributes, metrics):
    with self.assertRaises(ValueError):
      Report(hparams=hparams, attributes=attributes, metrics=metrics)