def test_gradient_error(self): """Compare custom gradient with tf.gradient.""" labels = tf.constant([[0.4, 0.3, 0.3], [0.8, 0.1, 0.1], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]]) activations = tf.Variable(tf.random.normal(shape=[4, 3])) with tf.GradientTape() as tape1: internal_loss = bitemp._internal_bi_tempered_logistic_loss( activations, labels, 0.5, 1.5) numerical_gradient = tape1.gradient(internal_loss, activations) with tf.GradientTape() as tape2: actual_loss = bitemp.bi_tempered_logistic_loss( labels, activations, 0.5, 1.5) actual_gradient = tape2.gradient(actual_loss, activations) self.evaluate(tf.compat.v1.global_variables_initializer()) internal_loss_out, actual_loss_out = self.evaluate( [internal_loss, actual_loss]) numerical_gradient_out, actual_gradient_out = self.evaluate( [numerical_gradient[0], actual_gradient[0]]) self.assertEqual(actual_gradient.shape, (4, 3)) self.assertAllClose(actual_loss_out, internal_loss_out) self.assertAllClose(actual_gradient_out, numerical_gradient_out, atol=1e-5)
def test_constant_shift(self): """Test if adding a constant to all activations is vacuous.""" labels = tf.constant([[0.2, 0.3, 0.5], [0.4, 0.4, 0.2], [0.7, 0.2, 0.1]]) activations = tf.random.normal(shape=[3, 3]) bias = tf.random.normal(shape=[3, 1]) for t2 in [0.8, 1.2]: actual_loss = bitemp.bi_tempered_logistic_loss( labels, activations, 0.5, t2) shifted_loss = bitemp.bi_tempered_logistic_loss( labels, activations + bias, 0.5, t2) self.assertEqual(actual_loss.shape, [3]) actual_loss_out, shifted_loss_out = self.evaluate( [actual_loss, shifted_loss]) self.assertAllClose(actual_loss_out, shifted_loss_out)
def test_loss_value(self): """Test the loss based on precomputed values.""" labels = tf.constant([[0.2, 0.3, 0.5], [0.6, 0.3, 0.1], [0.2, 0.8, 0.0]]) activations = [[-0.5, 0.1, 2.0], [0.1, 1.5, -5.0], [4.0, -3.0, -6.0]] actual_loss = bitemp.bi_tempered_logistic_loss(labels, activations, 0.5, 1.5) self.assertAllClose(self.evaluate(actual_loss), [0.02301914, 0.18972909, 0.93874922]) actual_loss = bitemp.bi_tempered_logistic_loss(labels, activations, 0.5, 0.8, num_iters=20) self.assertAllClose(self.evaluate(actual_loss), [0.21646356, 0.41836615, 1.33997854])
def test_limit_case_logistic_loss(self): """Test for checking if t1 = t2 = 1.0 yields the logistic bitemp.""" labels = tf.constant([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) activations = tf.random.normal(shape=[3, 3]) actual_loss = bitemp.bi_tempered_logistic_loss(labels, activations, 1.0, 1.0) logistic_loss = tf.nn.softmax_cross_entropy_with_logits( logits=activations, labels=labels) actual_loss_out, logistic_loss_out = self.evaluate( [actual_loss, logistic_loss]) self.assertAllClose(actual_loss_out, logistic_loss_out)
def test_sparse_loss(self): """Test int labels.""" labels = tf.constant([0, 2, 1, 0]) activations = [[-0.5, 0.1, 2.0], [0.1, 1.5, -5.0], [4.0, -3.0, -6.0], [-1.5, 0.7, 5.2]] actual_loss = bitemp.bi_tempered_logistic_loss(tf.one_hot(labels, 3), activations, 0.5, 1.5) sparse_loss = bitemp.sparse_bi_tempered_logistic_loss( labels, activations, 0.5, 1.5) actual_loss_out = self.evaluate(actual_loss) sparse_loss_out = self.evaluate(sparse_loss) self.assertAllClose(actual_loss_out, sparse_loss_out) labels = tf.constant([[0, 2], [1, 0]]) activations = [[[-0.5, 0.1, 2.0], [0.1, 1.5, -5.0]], [[4.0, -3.0, -6.0], [-1.5, 0.7, 5.2]]] actual_loss = bitemp.bi_tempered_logistic_loss(tf.one_hot(labels, 3), activations, 0.5, 1.5) sparse_loss = bitemp.sparse_bi_tempered_logistic_loss( labels, activations, 0.5, 1.5) actual_loss_out = self.evaluate(actual_loss) sparse_loss_out = self.evaluate(sparse_loss) self.assertAllClose(actual_loss_out, sparse_loss_out)
def test_label_smoothing(self): """Test label smoothing.""" labels = tf.constant([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) activations = [[-0.5, 0.1, 2.0], [0.1, 1.5, -5.0], [4.0, -3.0, -6.0]] actual_loss = bitemp.bi_tempered_logistic_loss(labels, activations, 0.5, 1.5, label_smoothing=0.1) actual_loss_out = self.evaluate(actual_loss) self.assertAllClose(actual_loss_out, [0.76652711, 0.08627685, 1.35443510], atol=1e-5)
def test_dynamic_temperatures(self): """Test changing temperatures dynamically.""" labels = tf.constant([[0.2, 0.5, 0.3]]) activations = [[-0.5, 0.1, 2.0]] t1_values = [1.0, 0.9, 0.8, 0.7] t2_values = [1.0, 1.1, 1.2, 1.3] loss_values = [[0.62870466], [0.45677936], [0.34298314], [0.26295574]] loss_out = [] for t1_value, t2_value in zip(t1_values, t2_values): actual_loss = bitemp.bi_tempered_logistic_loss(labels, activations, t1_value, t2_value, num_iters=5) loss_out.append(self.evaluate(actual_loss)) self.assertAllClose(loss_values, loss_out, atol=1e-5)