Esempio n. 1
0
    def apply(self, Xs, Ys, reversed_Ys, reverse_state: Dict):
        # Apply relus conditioned on backpropagated values.
        reversed_Ys = kutils.apply(self._activation, reversed_Ys)

        # Apply gradient of forward pass without relus.
        Ys_wo_relu = kutils.apply(self._layer_wo_relu, Xs)
        return ilayers.GradientWRT(len(Xs))(Xs + Ys_wo_relu + reversed_Ys)
Esempio n. 2
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))
        to_low = keras.layers.Lambda(lambda x: x * 0 + self._low)
        to_high = keras.layers.Lambda(lambda x: x * 0 + self._high)

        low = [to_low(x) for x in Xs]
        high = [to_high(x) for x in Xs]

        # Get values for the division.
        A = kutils.apply(self._layer_wo_act, Xs)
        B = kutils.apply(self._layer_wo_act_positive, low)
        C = kutils.apply(self._layer_wo_act_negative, high)
        Zs = [
            keras.layers.Subtract()([a, keras.layers.Add()([b, c])])
            for a, b, c in zip(A, B, C)
        ]

        # Divide relevances with the value.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
        # Distribute along the gradient.
        tmpA = iutils.to_list(grad(Xs + A + tmp))
        tmpB = iutils.to_list(grad(low + B + tmp))
        tmpC = iutils.to_list(grad(high + C + tmp))

        tmpA = [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmpA)]
        tmpB = [keras.layers.Multiply()([a, b]) for a, b in zip(low, tmpB)]
        tmpC = [keras.layers.Multiply()([a, b]) for a, b in zip(high, tmpC)]

        tmp = [
            keras.layers.Subtract()([a, keras.layers.Add()([b, c])])
            for a, b, c in zip(tmpA, tmpB, tmpC)
        ]

        return tmp
Esempio n. 3
0
def GuidedBackpropReverseReLULayer(Xs, Ys, reversed_Ys, reverse_state: Dict):
    activation = keras.layers.Activation("relu")
    # Apply relus conditioned on backpropagated values.
    reversed_Ys = kutils.apply(activation, reversed_Ys)

    # Apply gradient of forward pass.
    return ilayers.GradientWRT(len(Xs))(Xs + Ys + reversed_Ys)
Esempio n. 4
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        #this method is correct, but wasteful
        grad = ilayers.GradientWRT(len(Xs))
        times_alpha0 = tensorflow.keras.layers.Lambda(
            lambda x: x * self._alpha[0])
        times_alpha1 = tensorflow.keras.layers.Lambda(
            lambda x: x * self._alpha[1])
        times_beta0 = tensorflow.keras.layers.Lambda(
            lambda x: x * self._beta[0])
        times_beta1 = tensorflow.keras.layers.Lambda(
            lambda x: x * self._beta[1])
        keep_positives = tensorflow.keras.layers.Lambda(
            lambda x: x * K.cast(K.greater(x, 0), K.floatx()))
        keep_negatives = tensorflow.keras.layers.Lambda(
            lambda x: x * K.cast(K.less(x, 0), K.floatx()))

        def f(layer, X):
            Zs = kutils.apply(layer, X)
            # Divide incoming relevance by the activations.
            tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
            # Propagate the relevance to the input neurons
            # using the gradient
            tmp = iutils.to_list(grad(X + Zs + tmp))
            # Re-weight relevance with the input values.
            tmp = [
                tensorflow.keras.layers.Multiply()([a, b])
                for a, b in zip(X, tmp)
            ]
            return tmp

        # Distinguish postive and negative inputs.
        Xs_pos = kutils.apply(keep_positives, Xs)
        Xs_neg = kutils.apply(keep_negatives, Xs)

        # xpos*wpos
        r_pp = f(self._layer_wo_act_positive, Xs_pos)
        # xneg*wneg
        r_nn = f(self._layer_wo_act_negative, Xs_neg)
        # a0 * r_pp + a1 * r_nn
        r_pos = [
            tensorflow.keras.layers.Add()([times_alpha0(pp),
                                           times_beta1(nn)])
            for pp, nn in zip(r_pp, r_nn)
        ]

        # xpos*wneg
        r_pn = f(self._layer_wo_act_negative, Xs_pos)
        # xneg*wpos
        r_np = f(self._layer_wo_act_positive, Xs_neg)
        # b0 * r_pn + b1 * r_np
        r_neg = [
            tensorflow.keras.layers.Add()([times_beta0(pn),
                                           times_beta1(np)])
            for pn, np in zip(r_pn, r_np)
        ]

        return [
            tensorflow.keras.layers.Subtract()([a, b])
            for a, b in zip(r_pos, r_neg)
        ]
Esempio n. 5
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        #this method is correct, but wasteful
        grad = ilayers.GradientWRT(len(Xs))
        times_alpha = tensorflow.keras.layers.Lambda(lambda x: x * self._alpha)
        times_beta = tensorflow.keras.layers.Lambda(lambda x: x * self._beta)
        keep_positives = tensorflow.keras.layers.Lambda(
            lambda x: x * K.cast(K.greater(x, 0), K.floatx()))
        keep_negatives = tensorflow.keras.layers.Lambda(
            lambda x: x * K.cast(K.less(x, 0), K.floatx()))

        def f(layer1, layer2, X1, X2):
            # Get activations of full positive or negative part.
            Z1 = kutils.apply(layer1, X1)
            Z2 = kutils.apply(layer2, X2)
            Zs = [
                tensorflow.keras.layers.Add()([a, b]) for a, b in zip(Z1, Z2)
            ]
            # Divide incoming relevance by the activations.
            tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
            # Propagate the relevance to the input neurons
            # using the gradient
            tmp1 = iutils.to_list(grad(X1 + Z1 + tmp))
            tmp2 = iutils.to_list(grad(X2 + Z2 + tmp))
            # Re-weight relevance with the input values.
            tmp1 = [
                tensorflow.keras.layers.Multiply()([a, b])
                for a, b in zip(X1, tmp1)
            ]
            tmp2 = [
                tensorflow.keras.layers.Multiply()([a, b])
                for a, b in zip(X2, tmp2)
            ]
            #combine and return
            return [
                tensorflow.keras.layers.Add()([a, b])
                for a, b in zip(tmp1, tmp2)
            ]

        # Distinguish postive and negative inputs.
        Xs_pos = kutils.apply(keep_positives, Xs)
        Xs_neg = kutils.apply(keep_negatives, Xs)
        # xpos*wpos + xneg*wneg
        activator_relevances = f(self._layer_wo_act_positive,
                                 self._layer_wo_act_negative, Xs_pos, Xs_neg)

        if self._beta:  #only compute beta-weighted contributions of beta is not zero
            # xpos*wneg + xneg*wpos
            inhibitor_relevances = f(self._layer_wo_act_negative,
                                     self._layer_wo_act_positive, Xs_pos,
                                     Xs_neg)
            return [
                tensorflow.keras.layers.Subtract()(
                    [times_alpha(a), times_beta(b)])
                for a, b in zip(activator_relevances, inhibitor_relevances)
            ]
        else:
            return activator_relevances
Esempio n. 6
0
 def _gradient_reverse_mapping(
     self,
     Xs: OptionalList[Tensor],
     Ys: OptionalList[Tensor],
     reversed_Ys: OptionalList[Tensor],
     reverse_state: Dict,
 ):
     mask = [x not in reverse_state["stop_mapping_at_tensors"] for x in Xs]
     masked_grad = ilayers.GradientWRT(len(Xs), mask=mask)
     return masked_grad(Xs + Ys + reversed_Ys)
Esempio n. 7
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))

        # Get activations.
        Zs = kutils.apply(self._layer_wo_act, Xs)
        # Divide incoming relevance by the activations.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
        # Propagate the relevance to input neurons
        # using the gradient.
        tmp = iutils.to_list(grad(Xs + Zs + tmp))
        # Re-weight relevance with the input values.
        return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
Esempio n. 8
0
    def apply(self, Xs, _Ys, reversed_Ys, _reverse_state: Dict):
        # Reapply the prepared layers.
        act_Xs = kutils.apply(self._filter_layer, Xs)
        act_Ys = kutils.apply(self._act_layer, act_Xs)
        pattern_Ys = kutils.apply(self._pattern_layer, Xs)

        # Layers that apply the backward pass.
        grad_act = ilayers.GradientWRT(len(act_Xs))
        grad_pattern = ilayers.GradientWRT(len(Xs))

        # First step: propagate through the activation layer.
        # Workaround for linear activations.
        linear_activations = [None, keras.activations.get("linear")]
        if self._act_layer.activation in linear_activations:
            tmp = reversed_Ys
        else:
            # if linear activation this behaves strange
            tmp = iutils.to_list(grad_act(act_Xs + act_Ys + reversed_Ys))

        # Second step: propagate through the pattern layer.
        return grad_pattern(Xs + pattern_Ys + tmp)
Esempio n. 9
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))
        # Create dummy forward path to take the derivative below.
        Ys = kutils.apply(self._layer_wo_act_b, Xs)

        # Compute the sum of the weights.
        ones = ilayers.OnesLike()(Xs)
        Zs = iutils.to_list(self._layer_wo_act_b(ones))
        # Weight the incoming relevance.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
        # Redistribute the relevances along the gradient.
        tmp = iutils.to_list(grad(Xs + Ys + tmp))
        return tmp
Esempio n. 10
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        # the outputs of the pooling operation at each location is the sum of its inputs.
        # the forward message must be known in this case, and are the inputs for each pooling thing.
        # the gradient is 1 for each output-to-input connection, which corresponds to the "weights"
        # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt
        grad = ilayers.GradientWRT(len(Xs))
        # Get activations.
        Zs = kutils.apply(self._layer_wo_act, Xs)
        # Divide incoming relevance by the activations.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]

        # Propagate the relevance to input neurons
        # using the gradient.
        tmp = iutils.to_list(grad(Xs + Zs + tmp))
        # Re-weight relevance with the input values.
        return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
Esempio n. 11
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))

        #TODO: assert all inputs are positive, instead of only keeping the positives.
        #keep_positives = keras.layers.Lambda(lambda x: x * K.cast(K.greater(x,0), K.floatx()))
        #Xs = kutils.apply(keep_positives, Xs)

        # Get activations.
        Zs = kutils.apply(self._layer_wo_act_b_positive, Xs)
        # Divide incoming relevance by the activations.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
        # Propagate the relevance to input neurons
        # using the gradient.
        tmp = iutils.to_list(grad(Xs + Zs + tmp))
        # Re-weight relevance with the input values.
        return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
Esempio n. 12
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))
        # The epsilon rule aligns epsilon with the (extended) sign: 0 is considered to be positive
        prepare_div = keras.layers.Lambda(lambda x: x + (K.cast(
            K.greater_equal(x, 0), K.floatx()) * 2 - 1) * self._epsilon)

        # Get activations.
        Zs = kutils.apply(self._layer_wo_act, Xs)

        # Divide incoming relevance by the activations.
        tmp = [ilayers.Divide()([a, prepare_div(b)]) for a, b in zip(Rs, Zs)]
        # Propagate the relevance to input neurons
        # using the gradient.
        tmp = iutils.to_list(grad(Xs + Zs + tmp))
        # Re-weight relevance with the input values.
        return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
Esempio n. 13
0
 def _default_reverse_mapping(self, Xs, Ys, reversed_Ys, reverse_state):
     ##print("    in _default_reverse_mapping:", reverse_state['layer'].__class__.__name__, '(nid: {})'.format(reverse_state['nid']),  end='->')
     default_return_layers = [keras.layers.Activation]# TODO extend
     if(len(Xs) == len(Ys) and
        isinstance(reverse_state['layer'], (keras.layers.Activation,)) and
        all([K.int_shape(x) == K.int_shape(y) for x, y in zip(Xs, Ys)])):
         # Expect Xs and Ys to have the same shapes.
         # There is not mixing of relevances as there is kernel,
         # therefore we pass them as they are.
         ##print('return R')
         return reversed_Ys
     else:
         # This branch covers:
         # MaxPooling
         # Average Pooling
         # Max
         # Flatten
         # Reshape
         # Concatenate
         # Cropping
         ##print('ilayers.GradientWRT')
         return ilayers.GradientWRT(len(Xs))(Xs+Ys+reversed_Ys)
    def RectGradReverseReLULayer(Xs, Ys, reversed_Ys, reverse_state):
        def rectgrad(inputs):
            def threshold(x, q):
                if len(x.shape.as_list()) > 3:
                    thresh = tf.contrib.distributions.percentile(
                        x, q, axis=[1, 2, 3], keep_dims=True)
                else:
                    thresh = tf.contrib.distributions.percentile(
                        x, q, axis=1, keep_dims=True)
                return thresh

            y, grad = inputs
            activation_grad = y * grad
            thresh = threshold(activation_grad, percentile)
            return tf.where(thresh < activation_grad, grad,
                            tf.zeros_like(grad))

        rectgrad_layer = keras.layers.Lambda(rectgrad)
        y_rev = rectgrad_layer([Ys[0], reversed_Ys[0]])
        #print(reversed_Ys[0].shape, y_rev.shape)
        from innvestigate import layers as ilayers
        return ilayers.GradientWRT(len(Xs))(Xs + Ys + [y_rev])
        return [y_rev]
Esempio n. 15
0
    def apply(self, Xs, Ys, Rs, reverse_state):
        grad = ilayers.GradientWRT(len(Xs))
        to_low = keras.layers.Lambda(lambda x: x * 0 + self._low)
        to_high = keras.layers.Lambda(lambda x: x * 0 + self._high)

        def f(Xs):
            low = [to_low(x) for x in Xs]
            high = [to_high(x) for x in Xs]

            A = kutils.apply(self._layer_wo_act, Xs)
            B = kutils.apply(self._layer_wo_act_positive, low)
            C = kutils.apply(self._layer_wo_act_negative, high)
            return [
                keras.layers.Subtract()([a, keras.layers.Add()([b, c])])
                for a, b, c in zip(A, B, C)
            ]

        # Get values for the division.
        Zs = f(Xs)
        # Divide relevances with the value.
        tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)]
        # Distribute along the gradient.
        tmp = iutils.to_list(grad(Xs + Zs + tmp))
        return tmp
Esempio n. 16
0
 def _default_reverse_mapping(self, Xs, Ys, reversed_Ys, reverse_state):
     return ilayers.GradientWRT(len(Xs))(Xs + Ys + reversed_Ys)