def IsMrsTrueInModel(self):
        l = layers.TaylorScorer(compute_removal_saliency=True,
                                compute_mean_replacement_saliency=True)
        l_before = tf.keras.layers.Dense(20, activation=tf.nn.tanh)
        model = tf.keras.Sequential([
            l_before, l,
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(
                32, activation=lambda x: tf.nn.log_softmax(x, axis=1))
        ])
        # Building the model. Don't need the return value.
        model(tf.random.uniform((3, 5)))
        l_before.weights[0].assign(
            tf.concat([l_before.weights[0][:, 1:],
                       tf.zeros((5, 1))], axis=1))

        x = tf.Variable(tf.random.uniform((3, 5)))
        a_mean = tf.reduce_mean(l_before(x), axis=0)
        with tf.GradientTape() as tape:
            y = model(x)
            loss = tf.reduce_sum(y)
        # Don't need the gradient itself, this would accumulate mrs_score.
        tape.gradient(loss, x)

        self.assertAllEqual(a_mean, l.get_saved_values('mean')[0])
        self.assertAllEqual(a_mean, l.get_saved_values('mean')[1])
        self.assertAllEqual(a_mean, l.get_saved_values('mean')[2])
        # Since last unit creates just whatever its bias is, it should be zero
        self.assertEqual(l.get_saved_values('mrs')[-1].numpy(), 0.0)
 def testIdentity(self):
     l = layers.TaylorScorer(compute_removal_saliency=False,
                             compute_mean_replacement_saliency=False)
     a = tf.random.uniform((3, 5))
     self.assertAllEqual(l(a), a)
     a = tf.random.uniform((3, 5, 5, 2))
     self.assertAllEqual(l(a), a)
 def testIsRsTrue(self):
     values = [tf.random.uniform((3, 5)), tf.random.uniform((3, 5, 5, 4))]
     for inp in values:
         n_dim = len(inp.shape)
         l = layers.TaylorScorer(compute_removal_saliency=True,
                                 compute_mean_replacement_saliency=False)
         zeros_channel = tf.zeros(inp.shape.as_list()[:-1] + [1])
         inp_concat = tf.concat((inp, zeros_channel), axis=n_dim - 1)
         x = tf.Variable(inp_concat)
         x_mean = tf.reduce_mean(x, axis=list(range(n_dim - 1)))
         with tf.GradientTape() as tape:
             y = l(x)
             loss = tf.reduce_sum(y)
         # After forward pass it needs to be set to None
         self.assertIsNone(l.get_saved_values('rs'))
         dx = tape.gradient(loss, x)
         # RS should be set to None.
         self.assertIsNone(l.get_saved_values('mrs'))
         # dy is just 1's.
         self.assertAllEqual(dx, tf.ones_like(inp_concat))
         # Normalize the sum.
         avg_change = -x
         if n_dim > 2:
             avg_change = tf.reduce_sum(avg_change,
                                        axis=list(range(1, n_dim - 1)))
         correct_rs = tf.reduce_sum(tf.abs(avg_change), axis=0) / int(
             tf.size(x[Ellipsis, 0]))
         self.assertAllClose(correct_rs, l.get_saved_values('rs'))
         # Since last unit is just ones, replacing it with its mean has 0 penalty.
         self.assertEqual(l.get_saved_values('rs')[-1].numpy(), 0.0)
         # We still expect the mean to be calculated
         self.assertAllEqual(x_mean, l.get_saved_values('mean'))
         self.assertAllEqual(l(inp), inp)
    def testIsAbsTrue(self):
        l = layers.TaylorScorer(is_abs=False, compute_removal_saliency=True)
        a = tf.constant([[-1, 0, 1], [1, 0, 1]], dtype=tf.float32)
        x = tf.Variable(a)
        x_mean = tf.reduce_mean(x, axis=0)
        with tf.GradientTape() as tape:
            y = l(x)
            loss = tf.reduce_sum(y)
        # Before backward pass it is None.
        self.assertIsNone(l.get_saved_values('rs'))
        dx = tape.gradient(loss, x)
        # dy is just 1's.
        correct_rs = tf.constant([0, 0, -1])
        self.assertAllEqual(dx, tf.ones_like(a))
        self.assertAllEqual(correct_rs, l.get_saved_values('rs'))
        # We still expect the mean to be calculated
        self.assertAllEqual(x_mean, l.get_saved_values('mean'))

        # Lets do the same with  and get non_zero rs.
        with tf.GradientTape() as tape:
            y = l(x, is_abs=True)
            loss = tf.reduce_sum(y)
        # Before backward pass it is None.
        self.assertIsNone(l.get_saved_values('rs'))
        tape.gradient(loss, x)
        correct_rs = tf.constant([1, 0, 1])
        self.assertAllEqual(correct_rs, l.get_saved_values('rs'))
 def testAggregationRS(self):
     l = layers.TaylorScorer(compute_removal_saliency=False,
                             compute_mean_replacement_saliency=False)
     x1 = tf.Variable(tf.random.uniform((3, 5)))
     with tf.GradientTape() as tape:
         y = l(x1, compute_removal_saliency=True)
         loss = tf.reduce_sum(y)
     # After forward pass it needs to be set to None
     tape.gradient(loss, x1)
     first_rs = l.get_saved_values('rs')
     # This should remove the previos rs, mrs, mean values.
     y = l(x1)
     self.assertIsNone(l.get_saved_values('rs'))
     # Another input
     x2 = tf.Variable(tf.random.uniform((3, 5)))
     with tf.GradientTape() as tape:
         y = l(x2, compute_removal_saliency=True)
         loss = tf.reduce_sum(y)
     # After forward pass it needs to be set to None
     tape.gradient(loss, x2)
     second_rs = l.get_saved_values('rs')
     # Aggregating once
     with tf.GradientTape() as tape:
         y = l(x1, compute_removal_saliency=True, aggregate_values=True)
         loss = tf.reduce_sum(y)
     tape.gradient(loss, x1)
     self.assertAllClose((first_rs + second_rs) / 2,
                         l.get_saved_values('rs'))
     # Aggregating twice.
     with tf.GradientTape() as tape:
         y = l(x1, compute_removal_saliency=True, aggregate_values=True)
         loss = tf.reduce_sum(y)
     tape.gradient(loss, x1)
     self.assertAllClose((first_rs + first_rs + second_rs) / 3,
                         l.get_saved_values('rs'))
 def testArgs(self):
     l = layers.TaylorScorer(name='test',
                             compute_removal_saliency=False,
                             compute_mean_replacement_saliency=True)
     self.assertEqual(l.name, 'test')
     self.assertFalse(l.compute_removal_saliency)
     self.assertTrue(l.compute_mean_replacement_saliency)
     self.assertTrue(l.is_abs)
     self.assertFalse(l.save_l2norm)
 def testGetConfig(self):
     l = layers.TaylorScorer()
     expected_config = {
         'is_abs': True,
         'compute_removal_saliency': False,
         'compute_mean_replacement_saliency': False,
         'save_l2norm': False,
         'trainable': False
     }
     self.assertDictContainsSubset(expected_config, l.get_config())
    def testGetMeanValuesAggregated(self):
        l = layers.TaylorScorer(compute_removal_saliency=False,
                                compute_mean_replacement_saliency=False)
        x1 = tf.random.uniform((3, 5))
        l(x1)
        x2 = tf.random.uniform((6, 5))
        l(x2, aggregate_values=True)

        correct_mean = tf.reduce_mean(tf.concat([x1, x2], 0), axis=0)
        self.assertAllClose(l.get_saved_values('mean'), correct_mean)
 def testAggregationMean(self):
     l = layers.TaylorScorer(compute_removal_saliency=False,
                             compute_mean_replacement_saliency=False)
     x1 = tf.random.uniform((3, 5))
     l(x1)
     first_mean = l.get_saved_values('mean')
     self.assertEqual(len(l._mean), 2)
     x2 = tf.random.uniform((6, 5))
     # Removing the previous one
     l(x2)
     second_mean = l.get_saved_values('mean')
     self.assertEqual(len(l._mean), 2)
     l(x1, aggregate_values=True)
     self.assertAllClose((first_mean + second_mean * 2) / 3,
                         l.get_saved_values('mean'))
 def testL2Norm(self):
     l = layers.TaylorScorer()
     x1 = tf.random.uniform((3, 5))
     l(x1)
     self.assertIsNone(l.get_saved_values('l2norm'))
     self.assertIsNone(l._l2norm)
     l(x1, save_l2norm=True)
     correct_l2normsquared = tf.square(tf.norm(x1, axis=0)) / x1.shape[0]
     self.assertAllClose(l.get_saved_values('l2norm'),
                         correct_l2normsquared)
     x2 = tf.random.uniform((3, 5))
     l(x2, save_l2norm=True, aggregate_values=True)
     correct_l2normsquared2 = tf.square(tf.norm(x2, axis=0)) / x2.shape[0]
     self.assertAllClose(l.get_saved_values('l2norm'),
                         (correct_l2normsquared + correct_l2normsquared2) /
                         2)
 def testGetMeanValues(self):
     l = layers.TaylorScorer(compute_removal_saliency=False,
                             compute_mean_replacement_saliency=False)
     x = tf.random.uniform((3, 5))
     l(x)
     x_mean = tf.reduce_mean(x, axis=0)
     self.assertAllEqual(x_mean, l.get_saved_values('mean'))
     self.assertAllEqual(
         tf.broadcast_to(x_mean, x.shape),
         l.get_saved_values('mean', broadcast_to_input_shape=True))
     rand_mask = tf.cast(
         tf.random.uniform(x_mean.shape[:1], dtype=tf.int32, maxval=2),
         tf.float32)
     self.assertAllEqual(rand_mask * x_mean,
                         l.get_saved_values('mean', unit_mask=rand_mask))
     self.assertAllEqual(
         tf.broadcast_to(rand_mask * x_mean, x.shape),
         l.get_saved_values('mean',
                            unit_mask=rand_mask,
                            broadcast_to_input_shape=True))