def _make_metrics(self, metric_fn, mode=tf.estimator.ModeKeys.EVAL, multi_head=False, sess=None): with context.graph_mode(): if multi_head: head = multi_head_lib.MultiHead(heads=[ binary_class_head.BinaryClassHead( name="head1", loss_reduction=tf_compat.SUM), binary_class_head.BinaryClassHead( name="head2", loss_reduction=tf_compat.SUM) ]) labels = {"head1": tf.constant([0, 1]), "head2": tf.constant([0, 1])} else: head = binary_class_head.BinaryClassHead(loss_reduction=tf_compat.SUM) labels = tf.constant([0, 1]) features = {"x": tf.constant([[1.], [2.]])} builder = _EnsembleBuilder(head, metric_fn=metric_fn) subnetwork_manager = _SubnetworkManager(head, metric_fn=metric_fn) subnetwork_builder = _Builder( lambda unused0, unused1: tf.no_op(), lambda unused0, unused1: tf.no_op(), use_logits_last_layer=True) subnetwork_spec = subnetwork_manager.build_subnetwork_spec( name="test", subnetwork_builder=subnetwork_builder, summary=_FakeSummary(), features=features, mode=mode, labels=labels) ensemble_spec = builder.build_ensemble_spec( name="test", candidate=EnsembleCandidate("foo", [subnetwork_builder], None), ensembler=ComplexityRegularizedEnsembler( mixture_weight_type=MixtureWeightType.SCALAR), subnetwork_specs=[subnetwork_spec], summary=_FakeSummary(), features=features, iteration_number=0, labels=labels, mode=mode) subnetwork_metric_ops = call_eval_metrics(subnetwork_spec.eval_metrics) ensemble_metric_ops = call_eval_metrics(ensemble_spec.eval_metrics) evaluate = self.evaluate if sess is not None: evaluate = sess.run evaluate((tf_compat.v1.global_variables_initializer(), tf_compat.v1.local_variables_initializer())) evaluate((subnetwork_metric_ops, ensemble_metric_ops)) # Return the idempotent tensor part of the (tensor, op) metrics tuple. return { k: evaluate(subnetwork_metric_ops[k][0]) for k in subnetwork_metric_ops }, {k: evaluate(ensemble_metric_ops[k][0]) for k in ensemble_metric_ops}
def test_multi_head_provided(self): """Tests error raised when a multi-head is provided.""" with self.assertRaisesRegexp( ValueError, '`MultiHead` is not supported with `SequentialHeadWrapper`.'): _ = seq_head_lib.SequentialHeadWrapper( multi_head.MultiHead( [binary_head_lib.BinaryClassHead(name='test-head')]))
def test_should_error_out_for_not_recognized_args(self): head = binary_class_head.BinaryClassHead(loss_reduction=tf_compat.SUM) def metric_fn(features, not_recognized): _, _ = features, not_recognized return {} with self.assertRaisesRegexp(ValueError, "not_recognized"): _EnsembleBuilder(head, metric_fn=metric_fn)
def test_head_properties(self): """Tests that the head's properties are correcly implemented.""" static_head = binary_head_lib.BinaryClassHead( loss_reduction=losses_utils.ReductionV2.SUM, name='a_static_head') head = seq_head_lib.SequentialHeadWrapper(static_head, 'a_sequence_mask_col') self.assertEqual(head.name, 'a_static_head_sequential') self.assertEqual(head.logits_dimension, 1) self.assertEqual(head.loss_reduction, losses_utils.ReductionV2.SUM) self.assertEqual(head.input_sequence_mask_key, 'a_sequence_mask_col') self.assertEqual(head.static_head.name, 'a_static_head')
def test_optimizer_v2_variable_name(self): head = head_lib.BinaryClassHead() logits = np.array(( (45, ), (-41, ), ), dtype=np.float32) labels = np.array(( (1, ), (1, ), ), dtype=np.float64) features = {'x': np.array(((42, ), ), dtype=np.float32)} class _Optimizer(optimizer_v2.OptimizerV2): def init(self, name, **kwargs): super(_Optimizer, self).__init__(name, **kwargs) def get_updates(self, loss, params): del params variable = tf.Variable(name='my_variable', dtype=tf.dtypes.float32, initial_value=0.) self._weights.append(variable) return [variable] def get_config(self): config = super(_Optimizer, self).get_config() return config # Create estimator spec. optimizer = _Optimizer('my_optimizer') old_opt_variable_name_prefix = 'training/' + optimizer.__class__.__name__ spec = head.create_estimator_spec(features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=optimizer, trainable_variables=[ tf.Variable( [1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) optimizer_variables = optimizer.variables() var_values = sess.run(optimizer_variables) self.assertEqual(0., var_values[0]) for var in optimizer_variables: self.assertNotIn(old_opt_variable_name_prefix, var.name)
def test_metrics(self): """Tests the `metrics` method. Tests that: - Returned metrics match the returned metrics of the static head. - `regularization_losses` argument is properly passed to the static head's method. """ head = seq_head_lib.SequentialHeadWrapper( binary_head_lib.BinaryClassHead(), 'mask') metrics = head.metrics(regularization_losses=2.5) keys = metric_keys.MetricKeys self.assertIn(keys.ACCURACY, metrics) self.assertIn(keys.LOSS_REGULARIZATION, metrics)
def test_predictions(self): """Tests predictions output. Use `predictions` method in eager execution, else `create_estimator_spec` in PREDICT mode. logits = [[0.3, -0.4], [0.2, 0.2]] logistics = 1 / (1 + exp(-logits)) = [[0.57, 0.40], [0.55, 0.55]] """ head = seq_head_lib.SequentialHeadWrapper( binary_head_lib.BinaryClassHead(), 'sequence_mask') logits = [[[0.3], [-0.4]], [[0.2], [0.2]]] expected_logistics = [[[0.574443], [0.401312]], [[0.549834], [0.549834]]] features = { 'sequence_mask': ops.convert_to_tensor(np.array([[1, 1], [1, 0]])) } keys = prediction_keys.PredictionKeys if tf.executing_eagerly(): predictions = head.predictions(logits=logits, keys=[keys.LOGITS, keys.LOGISTIC]) self.assertItemsEqual(predictions.keys(), [keys.LOGITS, keys.LOGISTIC]) self.assertAllClose(logits, predictions[keys.LOGITS]) self.assertAllClose(expected_logistics, predictions[keys.LOGISTIC]) return spec = head.create_estimator_spec(features=features, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[ tf.Variable( [1.0, 2.0], dtype=tf.dtypes.float32) ]) self.assertIn('sequence_mask', spec.predictions) with self.cached_session() as sess: self.assertAllEqual(sess.run(spec.predictions['sequence_mask']), features['sequence_mask']) self.assertAllClose(logits, sess.run(spec.predictions[keys.LOGITS])) self.assertAllClose(expected_logistics, sess.run(spec.predictions[keys.LOGISTIC]))
def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary, loss_reduction): """Creates either binary or multi-class head. Args: n_classes: Number of label classes. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A `Head` instance. """ if n_classes == 2: head = binary_class_head.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_class_head.MultiClassHead( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) return head
def test_head_with_invalid_optimizer(self): head = head_lib.BinaryClassHead() logits = np.array(( (45, ), (-41, ), ), dtype=np.float32) labels = np.array(( (1, ), (1, ), ), dtype=np.float64) features = {'x': np.array(((42, ), ), dtype=np.float32)} with self.assertRaisesRegexp( ValueError, r'The given optimizer is not a tf.keras.optimizers.Optimizer instance' ): # Create estimator spec. head.create_estimator_spec(features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=adam_v1.AdamOptimizer())
def test_init_errors(self, max_steps=None): head = binary_class_head.BinaryClassHead(loss_reduction=tf_compat.SUM) with self.test_session(): with self.assertRaises(ValueError): _SubnetworkManager(head, max_steps=max_steps)
def __init__(self, subnetwork_generator, max_iteration_steps, logits_dimension=1, ensemblers=None, ensemble_strategies=None, evaluator=None, adanet_loss_decay=.9, filepath=None): """Initializes an `adanet.keras.Model`. Args: subnetwork_generator: The :class:`adanet.subnetwork.Generator` which defines the candidate subnetworks to train and evaluate at every AdaNet iteration. max_iteration_steps: Total number of steps for which to train candidates per iteration. If :class:`OutOfRange` or :class:`StopIteration` occurs in the middle, training stops before `max_iteration_steps` steps. When :code:`None`, it will train the current iteration forever. logits_dimension: The dimension of the final layer of any subnetworks. ensemblers: An iterable of :class:`adanet.ensemble.Ensembler` objects that define how to ensemble a group of subnetworks. If there are multiple, each should have a different `name` property. ensemble_strategies: An iterable of :class:`adanet.ensemble.Strategy` objects that define the candidate ensembles of subnetworks to explore at each iteration. evaluator: An :class:`adanet.Evaluator` for candidate selection after all subnetworks are done training. When :code:`None`, candidate selection uses a moving average of their :class:`adanet.Ensemble` AdaNet loss during training instead. In order to use the *AdaNet algorithm* as described in [Cortes et al., '17], the given :class:`adanet.Evaluator` must be created with the same dataset partition used during training. Otherwise, this framework will perform *AdaNet.HoldOut* which uses a holdout set for candidate selection, but does not benefit from learning guarantees. adanet_loss_decay: Float decay for the exponential-moving-average of the AdaNet objective throughout training. This moving average is a data- driven way tracking the best candidate with only the training set. filepath: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. """ logging.warning("""The AdaNet Keras API is currently experimental.""") self._subnetwork_generator = subnetwork_generator self._max_iteration_steps = max_iteration_steps self._logits_dimension = logits_dimension self._ensemblers = ensemblers self._ensemble_strategies = ensemble_strategies self._evaluator = evaluator self._adanet_loss_decay = adanet_loss_decay self._filepath = filepath self._model = None # Use lambdas to defer initialization of Head. self._loss_head_map = { "binary_crossentropy": lambda: binary_class_head.BinaryClassHead(), # pylint: disable=unnecessary-lambda "mse": lambda: regression_head.RegressionHead(self._logits_dimension), "mean_squared_error": lambda: regression_head.RegressionHead(self._logits_dimension), "sparse_categorical_crossentropy": lambda: multi_class_head.MultiClassHead(self._logits_dimension), }
def test_build_ensemble_spec( self, want_logits, want_loss=None, want_adanet_loss=None, want_ensemble_trainable_vars=None, adanet_lambda=0., adanet_beta=0., ensemble_spec_fn=lambda: None, use_bias=False, use_logits_last_layer=False, mixture_weight_type=MixtureWeightType.MATRIX, mixture_weight_initializer=tf_compat.v1.zeros_initializer(), warm_start_mixture_weights=True, subnetwork_builder_class=_Builder, mode=tf.estimator.ModeKeys.TRAIN, multi_head=False, want_subnetwork_trainable_vars=2): seed = 64 if multi_head: head = multi_head_lib.MultiHead(heads=[ binary_class_head.BinaryClassHead( name="head1", loss_reduction=tf_compat.SUM), binary_class_head.BinaryClassHead(name="head2", loss_reduction=tf_compat.SUM) ]) else: head = binary_class_head.BinaryClassHead( loss_reduction=tf_compat.SUM) builder = _EnsembleBuilder(head=head) def _subnetwork_train_op_fn(loss, var_list): self.assertLen(var_list, want_subnetwork_trainable_vars) self.assertEqual( var_list, tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES)) # Subnetworks get iteration steps instead of global steps. self.assertEqual("subnetwork_test/iteration_step", tf_compat.v1.train.get_global_step().op.name) # Subnetworks get scoped summaries. self.assertEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertEqual("fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertEqual("fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) optimizer = tf_compat.v1.train.GradientDescentOptimizer( learning_rate=.1) return optimizer.minimize(loss, var_list=var_list) def _mixture_weights_train_op_fn(loss, var_list): self.assertLen(var_list, want_ensemble_trainable_vars) self.assertEqual( var_list, tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES)) # Subnetworks get iteration steps instead of global steps. self.assertEqual("ensemble_test/iteration_step", tf_compat.v1.train.get_global_step().op.name) # Subnetworks get scoped summaries. self.assertEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertEqual("fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertEqual("fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) optimizer = tf_compat.v1.train.GradientDescentOptimizer( learning_rate=.1) return optimizer.minimize(loss, var_list=var_list) previous_ensemble = None previous_ensemble_spec = ensemble_spec_fn() if previous_ensemble_spec: previous_ensemble = previous_ensemble_spec.ensemble subnetwork_manager = _SubnetworkManager(head) subnetwork_builder = subnetwork_builder_class( _subnetwork_train_op_fn, _mixture_weights_train_op_fn, use_logits_last_layer, seed, multi_head=multi_head) with tf.Graph().as_default() as g: # A trainable variable to later verify that creating models does not # affect the global variables collection. _ = tf_compat.v1.get_variable("some_var", 0., trainable=True) features = {"x": tf.constant([[1.], [2.]])} if multi_head: labels = { "head1": tf.constant([0, 1]), "head2": tf.constant([0, 1]) } else: labels = tf.constant([0, 1]) subnetwork_spec = subnetwork_manager.build_subnetwork_spec( name="test", subnetwork_builder=subnetwork_builder, iteration_step=tf_compat.v1.train.get_or_create_global_step(), summary=_FakeSummary(), features=features, mode=mode, labels=labels, previous_ensemble=previous_ensemble) ensemble_spec = builder.build_ensemble_spec( # Note: when ensemble_spec is not None and warm_start_mixture_weights # is True, we need to make sure that the bias and mixture weights are # already saved to the checkpoint_dir. name="test", previous_ensemble_spec=previous_ensemble_spec, candidate=EnsembleCandidate("foo", [subnetwork_builder], None), ensembler=ComplexityRegularizedEnsembler( mixture_weight_type=mixture_weight_type, mixture_weight_initializer=mixture_weight_initializer, warm_start_mixture_weights=warm_start_mixture_weights, model_dir=self.test_subdirectory, adanet_lambda=adanet_lambda, adanet_beta=adanet_beta, use_bias=use_bias), subnetwork_specs=[subnetwork_spec], summary=_FakeSummary(), features=features, iteration_number=1, iteration_step=tf_compat.v1.train.get_or_create_global_step(), labels=labels, mode=mode) with tf_compat.v1.Session(graph=g).as_default() as sess: sess.run(tf_compat.v1.global_variables_initializer()) # Equals the number of subnetwork and ensemble trainable variables, # plus the one 'some_var' created earlier. self.assertLen( tf_compat.v1.trainable_variables(), want_subnetwork_trainable_vars + want_ensemble_trainable_vars + 1) # Get the real global step outside a subnetwork's context. self.assertEqual("global_step", tf_compat.v1.train.get_global_step().op.name) self.assertEqual("global_step", train.get_global_step().op.name) self.assertEqual("global_step", tf_v1.train.get_global_step().op.name) self.assertEqual("global_step", training_util.get_global_step().op.name) self.assertEqual( "global_step", tf_compat.v1.train.get_or_create_global_step().op.name) self.assertEqual("global_step", train.get_or_create_global_step().op.name) self.assertEqual( "global_step", tf_v1.train.get_or_create_global_step().op.name) self.assertEqual( "global_step", training_util.get_or_create_global_step().op.name) # Get global tf.summary outside a subnetwork's context. self.assertNotEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertNotEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertNotEqual( "fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertNotEqual( "fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) if mode == tf.estimator.ModeKeys.PREDICT: self.assertAllClose(want_logits, sess.run( ensemble_spec.ensemble.logits), atol=1e-3) self.assertIsNone(ensemble_spec.loss) self.assertIsNone(ensemble_spec.adanet_loss) self.assertIsNone(ensemble_spec.train_op) self.assertIsNotNone(ensemble_spec.export_outputs) return # Verify that train_op works, previous loss should be greater than loss # after a train op. loss = sess.run(ensemble_spec.loss) train_op = tf.group(subnetwork_spec.train_op.train_op, ensemble_spec.train_op.train_op) for _ in range(3): sess.run(train_op) self.assertGreater(loss, sess.run(ensemble_spec.loss)) self.assertAllClose(want_logits, sess.run(ensemble_spec.ensemble.logits), atol=1e-3) # Bias should learn a non-zero value when used. bias = sess.run(ensemble_spec.ensemble.bias) if isinstance(bias, dict): bias = sum(abs(b) for b in bias.values()) if use_bias: self.assertNotEqual(0., bias) else: self.assertAlmostEqual(0., bias) self.assertAlmostEqual(want_loss, sess.run(ensemble_spec.loss), places=3) self.assertAlmostEqual(want_adanet_loss, sess.run(ensemble_spec.adanet_loss), places=3)
def test_build_ensemble_spec( self, want_logits, want_loss=None, want_adanet_loss=None, want_ensemble_trainable_vars=None, adanet_lambda=0., adanet_beta=0., ensemble_spec_fn=lambda: None, use_bias=False, use_logits_last_layer=False, mixture_weight_type=MixtureWeightType.MATRIX, mixture_weight_initializer=tf_compat.v1.zeros_initializer(), warm_start_mixture_weights=True, subnetwork_builder_class=_Builder, mode=tf.estimator.ModeKeys.TRAIN, multi_head=False, want_subnetwork_trainable_vars=2, ensembler_class=ComplexityRegularizedEnsembler, my_ensemble_index=None, want_replay_indices=None, want_predictions=None, export_subnetworks=False, previous_ensemble_spec=None, previous_iteration_checkpoint=None): seed = 64 if multi_head: head = multi_head_lib.MultiHead(heads=[ binary_class_head.BinaryClassHead( name="head1", loss_reduction=tf_compat.SUM), binary_class_head.BinaryClassHead(name="head2", loss_reduction=tf_compat.SUM) ]) else: head = binary_class_head.BinaryClassHead( loss_reduction=tf_compat.SUM) builder = _EnsembleBuilder( head=head, export_subnetwork_logits=export_subnetworks, export_subnetwork_last_layer=export_subnetworks) def _subnetwork_train_op_fn(loss, var_list): self.assertLen(var_list, want_subnetwork_trainable_vars) self.assertEqual( var_list, tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES)) # Subnetworks get iteration steps instead of global steps. self.assertEqual("subnetwork_test/iteration_step", tf_compat.v1.train.get_global_step().op.name) # Subnetworks get scoped summaries. self.assertEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertEqual("fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertEqual("fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) optimizer = tf_compat.v1.train.GradientDescentOptimizer( learning_rate=.1) return optimizer.minimize(loss, var_list=var_list) def _mixture_weights_train_op_fn(loss, var_list): self.assertLen(var_list, want_ensemble_trainable_vars) self.assertEqual( var_list, tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES)) # Subnetworks get iteration steps instead of global steps. self.assertEqual("ensemble_test/iteration_step", tf_compat.v1.train.get_global_step().op.name) # Subnetworks get scoped summaries. self.assertEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertEqual("fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertEqual("fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) if not var_list: return tf.no_op() optimizer = tf_compat.v1.train.GradientDescentOptimizer( learning_rate=.1) return optimizer.minimize(loss, var_list=var_list) previous_ensemble = None previous_ensemble_spec = ensemble_spec_fn() if previous_ensemble_spec: previous_ensemble = previous_ensemble_spec.ensemble subnetwork_manager = _SubnetworkManager(head) subnetwork_builder = subnetwork_builder_class( _subnetwork_train_op_fn, _mixture_weights_train_op_fn, use_logits_last_layer, seed, multi_head=multi_head) with tf.Graph().as_default() as g: tf_compat.v1.train.get_or_create_global_step() # A trainable variable to later verify that creating models does not # affect the global variables collection. _ = tf_compat.v1.get_variable("some_var", shape=0, trainable=True) features = {"x": tf.constant([[1.], [2.]])} if multi_head: labels = { "head1": tf.constant([0, 1]), "head2": tf.constant([0, 1]) } else: labels = tf.constant([0, 1]) session_config = tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions(allow_growth=True)) subnetwork_spec = subnetwork_manager.build_subnetwork_spec( name="test", subnetwork_builder=subnetwork_builder, summary=_FakeSummary(), features=features, mode=mode, labels=labels, previous_ensemble=previous_ensemble) ensembler_kwargs = {} if ensembler_class is ComplexityRegularizedEnsembler: ensembler_kwargs.update({ "mixture_weight_type": mixture_weight_type, "mixture_weight_initializer": mixture_weight_initializer, "warm_start_mixture_weights": warm_start_mixture_weights, "model_dir": self.test_subdirectory, "adanet_lambda": adanet_lambda, "adanet_beta": adanet_beta, "use_bias": use_bias }) if ensembler_class is MeanEnsembler: ensembler_kwargs.update( {"add_mean_last_layer_predictions": True}) ensemble_spec = builder.build_ensemble_spec( # Note: when ensemble_spec is not None and warm_start_mixture_weights # is True, we need to make sure that the bias and mixture weights are # already saved to the checkpoint_dir. name="test", previous_ensemble_spec=previous_ensemble_spec, candidate=EnsembleCandidate("foo", [subnetwork_builder], None), ensembler=ensembler_class(**ensembler_kwargs), subnetwork_specs=[subnetwork_spec], summary=_FakeSummary(), features=features, iteration_number=1, labels=labels, my_ensemble_index=my_ensemble_index, mode=mode, previous_iteration_checkpoint=previous_iteration_checkpoint) if want_replay_indices: self.assertAllEqual(want_replay_indices, ensemble_spec.architecture.replay_indices) with tf_compat.v1.Session( graph=g, config=session_config).as_default() as sess: sess.run(tf_compat.v1.global_variables_initializer()) # Equals the number of subnetwork and ensemble trainable variables, # plus the one 'some_var' created earlier. self.assertLen( tf_compat.v1.trainable_variables(), want_subnetwork_trainable_vars + want_ensemble_trainable_vars + 1) # Get the real global step outside a subnetwork's context. self.assertEqual("global_step", tf_compat.v1.train.get_global_step().op.name) self.assertEqual("global_step", train.get_global_step().op.name) self.assertEqual("global_step", tf_v1.train.get_global_step().op.name) self.assertEqual("global_step", training_util.get_global_step().op.name) self.assertEqual( "global_step", tf_compat.v1.train.get_or_create_global_step().op.name) self.assertEqual("global_step", train.get_or_create_global_step().op.name) self.assertEqual( "global_step", tf_v1.train.get_or_create_global_step().op.name) self.assertEqual( "global_step", training_util.get_or_create_global_step().op.name) # Get global tf.summary outside a subnetwork's context. self.assertNotEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertNotEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertNotEqual( "fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertNotEqual( "fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) if mode == tf.estimator.ModeKeys.PREDICT: self.assertAllClose(want_logits, sess.run( ensemble_spec.ensemble.logits), atol=1e-3) self.assertIsNone(ensemble_spec.loss) self.assertIsNone(ensemble_spec.adanet_loss) self.assertIsNone(ensemble_spec.train_op) self.assertIsNotNone(ensemble_spec.export_outputs) if not export_subnetworks: return if not multi_head: subnetwork_logits = sess.run( ensemble_spec.export_outputs[ _EnsembleBuilder. _SUBNETWORK_LOGITS_EXPORT_SIGNATURE].outputs) self.assertAllClose( subnetwork_logits["test"], sess.run(subnetwork_spec.subnetwork.logits)) subnetwork_last_layer = sess.run( ensemble_spec.export_outputs[ _EnsembleBuilder. _SUBNETWORK_LAST_LAYER_EXPORT_SIGNATURE]. outputs) self.assertAllClose( subnetwork_last_layer["test"], sess.run(subnetwork_spec.subnetwork.last_layer)) else: self.assertIn("subnetwork_logits_head2", ensemble_spec.export_outputs) subnetwork_logits_head1 = sess.run( ensemble_spec. export_outputs["subnetwork_logits_head1"].outputs) self.assertAllClose( subnetwork_logits_head1["test"], sess.run( subnetwork_spec.subnetwork.logits["head1"])) self.assertIn("subnetwork_logits_head2", ensemble_spec.export_outputs) subnetwork_last_layer_head1 = sess.run( ensemble_spec.export_outputs[ "subnetwork_last_layer_head1"].outputs) self.assertAllClose( subnetwork_last_layer_head1["test"], sess.run(subnetwork_spec.subnetwork. last_layer["head1"])) return # Verify that train_op works, previous loss should be greater than loss # after a train op. loss = sess.run(ensemble_spec.loss) train_op = tf.group(subnetwork_spec.train_op.train_op, ensemble_spec.train_op.train_op) for _ in range(3): sess.run(train_op) self.assertGreater(loss, sess.run(ensemble_spec.loss)) self.assertAllClose(want_logits, sess.run(ensemble_spec.ensemble.logits), atol=1e-3) if ensembler_class is ComplexityRegularizedEnsembler: # Bias should learn a non-zero value when used. bias = sess.run(ensemble_spec.ensemble.bias) if isinstance(bias, dict): bias = sum(abs(b) for b in bias.values()) if use_bias: self.assertNotEqual(0., bias) else: self.assertAlmostEqual(0., bias) self.assertAlmostEqual(want_loss, sess.run(ensemble_spec.loss), places=3) self.assertAlmostEqual(want_adanet_loss, sess.run(ensemble_spec.adanet_loss), places=3) if want_predictions: self.assertAllClose( want_predictions, sess.run(ensemble_spec.ensemble.predictions), atol=1e-3)
def test_metrics_computation(self): """Runs metrics computation tests. Use `update_metrics` method in eager execution, else `create_estimator_spec` in EVAL mode. logits = [[-101, 102, -103], [104, _, _]] predicted_labels = [[0, 1, 0], [1, _, _]] labels = [[1, 1, 1], [1, _, _]] weights = [[2, 5, 1], [2, _, _]] loss = (101*2 + 103*1) / 10 = 30.5 accuracy = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7 prediction_mean = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7 precision = (5 + 2) / (5 + 2) = 1.0 recall = (5 + 2) / (2 + 5 + 1 + 2) = 0.7 """ static_head = binary_head_lib.BinaryClassHead(weight_column='weights') head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask', 'weights') features = { 'sequence_mask': np.array([[1, 1, 1], [1, 0, 0]]), 'weights': np.array([[2, 5, 1], [2, 100, 100]]) } regularization_losses = [100.] logits = _convert_to_tensor([[-101, 102, -103], [104, 100, 100]]) labels = sparse_tensor.SparseTensor(values=[1, 1, 1, 1], indices=((0, 0), (0, 1), (0, 2), (1, 0)), dense_shape=(2, 3)) features = _convert_to_tensor(features) expected_loss = 30.5 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss, keys.ACCURACY: 0.7, keys.PREDICTION_MEAN: 0.7, keys.LABEL_MEAN: 1.0, keys.LOSS_REGULARIZATION: 100, keys.PRECISION: 1.0, keys.RECALL: 0.7, keys.ACCURACY_BASELINE: 1.0, keys.AUC: 0., keys.AUC_PR: 1.0 } if context.executing_eagerly(): eval_metrics = head.metrics( regularization_losses=regularization_losses) updated_metrics = head.update_metrics(eval_metrics, features, logits, labels, regularization_losses) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses) with self.cached_session() as sess: head_utils._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = { k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops } update_ops = { k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops } _ = sess.run(update_ops) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops})
def __init__(self, sequence_feature_columns, context_feature_columns=None, num_units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, input_layer_partitioner=None, config=None): """Initializes a `RNNClassifier` instance. Args: sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `_DenseColumn` such as `numeric_column`, not the sequential variants. num_units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to construct the RNN. If set, `num_units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is needed for stacked RNNs. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Raises: ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type) if n_classes == 2: head = binary_head_lib.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_head_lib.MultiClassHead( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): return _rnn_model_fn( features=features, labels=labels, mode=mode, head=head, rnn_cell_fn=rnn_cell_fn, sequence_feature_columns=tuple(sequence_feature_columns or []), context_feature_columns=tuple(context_feature_columns or []), return_sequences=False, optimizer=optimizer, input_layer_partitioner=input_layer_partitioner, config=config) super(RNNClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
def simple_multi_head(export_path, eval_export_path): """Trains and exports a simple multi-headed model.""" def eval_input_receiver_fn(): """Eval input receiver function.""" serialized_tf_example = tf.compat.v1.placeholder( dtype=tf.string, shape=[None], name='input_example_tensor') language = tf.feature_column.categorical_column_with_vocabulary_list( 'language', ['english', 'chinese', 'other']) age = tf.feature_column.numeric_column('age') english_label = tf.feature_column.numeric_column('english_label') chinese_label = tf.feature_column.numeric_column('chinese_label') other_label = tf.feature_column.numeric_column('other_label') all_features = [ age, language, english_label, chinese_label, other_label ] feature_spec = tf.feature_column.make_parse_example_spec(all_features) receiver_tensors = {'examples': serialized_tf_example} features = tf.io.parse_example(serialized=serialized_tf_example, features=feature_spec) labels = { 'english_head': features['english_label'], 'chinese_head': features['chinese_label'], 'other_head': features['other_label'], } return export.EvalInputReceiver(features=features, receiver_tensors=receiver_tensors, labels=labels) def input_fn(): """Train input function.""" labels = { 'english_head': tf.constant([[1], [1], [0], [0], [0], [0]]), 'chinese_head': tf.constant([[0], [0], [1], [1], [0], [0]]), 'other_head': tf.constant([[0], [0], [0], [0], [1], [1]]) } features = { 'age': tf.constant([[1], [2], [3], [4], [5], [6]]), 'language': tf.SparseTensor(values=[ 'english', 'english', 'chinese', 'chinese', 'other', 'other' ], indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dense_shape=[6, 1]), } return features, labels language = tf.feature_column.categorical_column_with_vocabulary_list( 'language', ['english', 'chinese', 'other']) age = tf.feature_column.numeric_column('age') all_features = [age, language] feature_spec = tf.feature_column.make_parse_example_spec(all_features) # TODO(b/130299739): Update with tf.estimator.BinaryClassHead and # tf.estimator.MultiHead english_head = binary_class_head.BinaryClassHead(name='english_head') chinese_head = binary_class_head.BinaryClassHead(name='chinese_head') other_head = binary_class_head.BinaryClassHead(name='other_head') combined_head = multi_head.MultiHead( [english_head, chinese_head, other_head]) estimator = tf_compat_v1_estimator.DNNLinearCombinedEstimator( head=combined_head, dnn_feature_columns=[], dnn_optimizer=tf.compat.v1.train.AdagradOptimizer(learning_rate=0.01), dnn_hidden_units=[], linear_feature_columns=[language, age], linear_optimizer=tf.compat.v1.train.FtrlOptimizer(learning_rate=0.05)) estimator.train(input_fn=input_fn, steps=1000) return util.export_model_and_eval_model( estimator=estimator, serving_input_receiver_fn=( tf_estimator.export.build_parsing_serving_input_receiver_fn( feature_spec)), eval_input_receiver_fn=eval_input_receiver_fn, export_path=export_path, eval_export_path=eval_export_path)
def model_fn(features, labels, mode, config): """model_fn for the custom estimator.""" del config input_tensors = tfl.estimators.transform_features(features, feature_columns) inputs = { key: tf.keras.layers.Input(shape=(1,), name=key) for key in input_tensors } lattice_sizes = [3, 2, 2, 2] lattice_monotonicities = ['increasing', 'none', 'increasing', 'increasing'] lattice_input = tf.keras.layers.Concatenate(axis=1)([ tfl.layers.PWLCalibration( input_keypoints=np.linspace(10, 100, num=8, dtype=np.float32), # The output range of the calibrator should be the input range of # the following lattice dimension. output_min=0.0, output_max=lattice_sizes[0] - 1.0, monotonicity='increasing', )(inputs['age']), tfl.layers.CategoricalCalibration( # Number of categories including any missing/default category. num_buckets=2, output_min=0.0, output_max=lattice_sizes[1] - 1.0, )(inputs['sex']), tfl.layers.PWLCalibration( input_keypoints=[0.0, 1.0, 2.0, 3.0], output_min=0.0, output_max=lattice_sizes[0] - 1.0, # You can specify TFL regularizers as tuple # ('regularizer name', l1, l2). kernel_regularizer=('hessian', 0.0, 1e-4), monotonicity='increasing', )(inputs['ca']), tfl.layers.CategoricalCalibration( num_buckets=3, output_min=0.0, output_max=lattice_sizes[1] - 1.0, # Categorical monotonicity can be partial order. # (i, j) indicates that we must have output(i) <= output(i). # Make sure to set the lattice monotonicity to 1 for this dimension. monotonicities=[(0, 1), (0, 2)], )(inputs['thal']), ]) output = tfl.layers.Lattice( lattice_sizes=lattice_sizes, monotonicities=lattice_monotonicities)( lattice_input) training = (mode == tf.estimator.ModeKeys.TRAIN) model = tf.keras.Model(inputs=inputs, outputs=output) logits = model(input_tensors, training=training) if training: optimizer = optimizers.get_optimizer_instance_v2('Adam', FLAGS.learning_rate) else: optimizer = None head = binary_class_head.BinaryClassHead() return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits, trainable_variables=model.trainable_variables, update_ops=model.updates)
class IterationExportOutputsTest(tu.AdanetTestCase): @parameterized.named_parameters( { "testcase_name": "regression_head", "head": regression_head.RegressionHead(), }, { "testcase_name": "binary_classification_head", "head": binary_class_head.BinaryClassHead(), }) @test_util.run_in_graph_and_eager_modes def test_head_export_outputs(self, head): with context.graph_mode(): ensemble_builder = _HeadEnsembleBuilder(head) builder = _IterationBuilder(_FakeCandidateBuilder(), _FakeSubnetworkManager(), ensemble_builder, summary_maker=_ScopedSummary, ensemblers=[_FakeEnsembler()], max_steps=10) features = [[1., -1., 0.]] labels = [1] mode = tf.estimator.ModeKeys.PREDICT subnetwork_builders = [_FakeBuilder("test")] iteration = builder.build_iteration( base_global_step=0, iteration_number=0, ensemble_candidates=[ EnsembleCandidate("test", subnetwork_builders, None) ], subnetwork_builders=subnetwork_builders, features=features, labels=labels, config=tf.estimator.RunConfig( model_dir=self.test_subdirectory), mode=mode) # Compare iteration outputs with default head outputs. spec = head.create_estimator_spec(features=features, labels=labels, mode=mode, logits=[[.5]]) self.assertEqual(len(spec.export_outputs), len(iteration.estimator_spec.export_outputs)) for key in spec.export_outputs: if isinstance(spec.export_outputs[key], tf.estimator.export.RegressionOutput): self.assertAlmostEqual( self.evaluate(spec.export_outputs[key].value), self.evaluate(iteration.estimator_spec. export_outputs[key].value)) continue if isinstance(spec.export_outputs[key], tf.estimator.export.ClassificationOutput): self.assertAllClose( self.evaluate(spec.export_outputs[key].scores), self.evaluate(iteration.estimator_spec. export_outputs[key].scores)) self.assertAllEqual( self.evaluate(spec.export_outputs[key].classes), self.evaluate(iteration.estimator_spec. export_outputs[key].classes)) continue if isinstance(spec.export_outputs[key], tf.estimator.export.PredictOutput): if "classes" in spec.export_outputs[key].outputs: # Verify string Tensor outputs separately. self.assertAllEqual( self.evaluate( spec.export_outputs[key].outputs["classes"]), self.evaluate( iteration.estimator_spec.export_outputs[key]. outputs["classes"])) del spec.export_outputs[key].outputs["classes"] del iteration.estimator_spec.export_outputs[ key].outputs["classes"] if "all_classes" in spec.export_outputs[key].outputs: # Verify string Tensor outputs separately. self.assertAllEqual( self.evaluate(spec.export_outputs[key]. outputs["all_classes"]), self.evaluate( iteration.estimator_spec.export_outputs[key]. outputs["all_classes"])) del spec.export_outputs[key].outputs["all_classes"] del iteration.estimator_spec.export_outputs[ key].outputs["all_classes"] self.assertAllClose( self.evaluate(spec.export_outputs[key].outputs), self.evaluate(iteration.estimator_spec. export_outputs[key].outputs)) continue self.fail("Invalid export_output for {}.".format(key))
def __init__(self, sequence_feature_columns, context_feature_columns=None, units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, return_sequences=False, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, sequence_mask='sequence_mask', config=None): """Initializes a `RNNClassifier` instance. Args: sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `DenseColumn` such as `numeric_column`, not the sequential variants. units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A class producing a RNN cell or a string specifying the cell type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If set, `units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function that returns a RNN cell instance that will be used to construct the RNN. If set, `units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `units` and `cell_type`. Note that `tf.keras.layers.StackedRNNCells` is needed for stacked RNNs. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. Note that if True, `weight_column` must be None or a string. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. sequence_mask: A string with the name of the sequence mask tensor. If `sequence_mask` is in the features dictionary, the provided tensor is used, otherwise the sequence mask is computed from the length of sequential features. The sequence mask is used in evaluation and training mode to aggregate loss and metrics computation while excluding padding steps. It is also added to the predictions dictionary in prediction mode to indicate which steps are padding. config: `RunConfig` object to configure the runtime settings. Note that a RNN cell has: - a `call` method. - a `state_size` attribute. - a `output_size` attribute. - a `get_initial_state` method. See the documentation on `tf.keras.layers.RNN` for more details. Raises: ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ if n_classes == 2: head = binary_head_lib.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_head_lib.MultiClassHead( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) if return_sequences: logging.info( 'Converting head to sequential head with ' '`SequentialHeadWrapper` to allow sequential predictions.') head = seq_head_lib.SequentialHeadWrapper( head, sequence_length_mask=sequence_mask, feature_columns=weight_column) super(RNNClassifier, self).__init__(head=head, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, units=units, cell_type=cell_type, rnn_cell_fn=rnn_cell_fn, return_sequences=return_sequences, model_dir=model_dir, optimizer=optimizer, config=config)