def test_train_with_regularization_losses(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } expected_probabilities = { 'head1': nn.sigmoid(logits['head1']), 'head2': nn.sigmoid(logits['head2']), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } features = {'x': np.array(((42, ), ), dtype=np.int32)} regularization_losses = [1.5, 0.5] # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss1 = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] # loss2 = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15 # Average over classes, weighted sum over batch and heads. # weights = [1., 2.] # merged_training_loss = 1. * loss1 + 2. * loss2 # training_loss = merged_training_loss + regularization_loss # = 1. * loss1 + 2. * loss2 + sum([1.5, 0.5]) expected_loss_head1 = 8.75 expected_loss_head2 = 15.0 expected_regularization_loss = 2. # training loss. expected_loss = (1. * expected_loss_head1 + 2. * expected_loss_head2 + expected_regularization_loss) tol = 1e-3 loss = multi_head.loss(logits=logits, labels=labels, features=features, mode=model_fn.ModeKeys.TRAIN, regularization_losses=regularization_losses) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if context.executing_eagerly(): return keys = metric_keys.MetricKeys expected_train_result = 'my_train_op' def _train_op_fn(loss): return string_ops.string_join([ constant_op.constant(expected_train_result), string_ops.as_string(loss, precision=3) ]) spec = multi_head.create_estimator_spec( features=features, mode=model_fn.ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str, predictions = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op, spec.predictions)) self.assertAllClose( logits['head1'], predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head1'], predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose( logits['head2'], predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head2'], predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.LOSS: expected_loss, keys.LOSS + '/head1': expected_loss_head1, keys.LOSS + '/head2': expected_loss_head2, }, summary_str, tol)
def test_train_one_head(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') multi_head = multi_head_lib.MultiHead([head1]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) } expected_probabilities = { 'head1': nn.sigmoid(logits['head1']), } labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} features = {'x': np.array(((42, ), ), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 expected_loss = 8.75 tol = 1e-3 loss = multi_head.loss(logits=logits, labels=labels, features=features, mode=model_fn.ModeKeys.TRAIN) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if context.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return string_ops.string_join([ constant_op.constant(expected_train_result), string_ops.as_string(loss, precision=3) ]) spec = multi_head.create_estimator_spec(features=features, mode=model_fn.ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str, predictions = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op, spec.predictions)) self.assertAllClose( logits['head1'], predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head1'], predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss, }, summary_str, tol)