def apply(self, Xs, Ys, reversed_Ys, reverse_state: Dict): # Apply relus conditioned on backpropagated values. reversed_Ys = kutils.apply(self._activation, reversed_Ys) # Apply gradient of forward pass without relus. Ys_wo_relu = kutils.apply(self._layer_wo_relu, Xs) return ilayers.GradientWRT(len(Xs))(Xs + Ys_wo_relu + reversed_Ys)
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) to_low = keras.layers.Lambda(lambda x: x * 0 + self._low) to_high = keras.layers.Lambda(lambda x: x * 0 + self._high) low = [to_low(x) for x in Xs] high = [to_high(x) for x in Xs] # Get values for the division. A = kutils.apply(self._layer_wo_act, Xs) B = kutils.apply(self._layer_wo_act_positive, low) C = kutils.apply(self._layer_wo_act_negative, high) Zs = [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(A, B, C) ] # Divide relevances with the value. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Distribute along the gradient. tmpA = iutils.to_list(grad(Xs + A + tmp)) tmpB = iutils.to_list(grad(low + B + tmp)) tmpC = iutils.to_list(grad(high + C + tmp)) tmpA = [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmpA)] tmpB = [keras.layers.Multiply()([a, b]) for a, b in zip(low, tmpB)] tmpC = [keras.layers.Multiply()([a, b]) for a, b in zip(high, tmpC)] tmp = [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(tmpA, tmpB, tmpC) ] return tmp
def GuidedBackpropReverseReLULayer(Xs, Ys, reversed_Ys, reverse_state: Dict): activation = keras.layers.Activation("relu") # Apply relus conditioned on backpropagated values. reversed_Ys = kutils.apply(activation, reversed_Ys) # Apply gradient of forward pass. return ilayers.GradientWRT(len(Xs))(Xs + Ys + reversed_Ys)
def apply(self, Xs, Ys, Rs, reverse_state): #this method is correct, but wasteful grad = ilayers.GradientWRT(len(Xs)) times_alpha0 = tensorflow.keras.layers.Lambda( lambda x: x * self._alpha[0]) times_alpha1 = tensorflow.keras.layers.Lambda( lambda x: x * self._alpha[1]) times_beta0 = tensorflow.keras.layers.Lambda( lambda x: x * self._beta[0]) times_beta1 = tensorflow.keras.layers.Lambda( lambda x: x * self._beta[1]) keep_positives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.greater(x, 0), K.floatx())) keep_negatives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.less(x, 0), K.floatx())) def f(layer, X): Zs = kutils.apply(layer, X) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp = iutils.to_list(grad(X + Zs + tmp)) # Re-weight relevance with the input values. tmp = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X, tmp) ] return tmp # Distinguish postive and negative inputs. Xs_pos = kutils.apply(keep_positives, Xs) Xs_neg = kutils.apply(keep_negatives, Xs) # xpos*wpos r_pp = f(self._layer_wo_act_positive, Xs_pos) # xneg*wneg r_nn = f(self._layer_wo_act_negative, Xs_neg) # a0 * r_pp + a1 * r_nn r_pos = [ tensorflow.keras.layers.Add()([times_alpha0(pp), times_beta1(nn)]) for pp, nn in zip(r_pp, r_nn) ] # xpos*wneg r_pn = f(self._layer_wo_act_negative, Xs_pos) # xneg*wpos r_np = f(self._layer_wo_act_positive, Xs_neg) # b0 * r_pn + b1 * r_np r_neg = [ tensorflow.keras.layers.Add()([times_beta0(pn), times_beta1(np)]) for pn, np in zip(r_pn, r_np) ] return [ tensorflow.keras.layers.Subtract()([a, b]) for a, b in zip(r_pos, r_neg) ]
def apply(self, Xs, Ys, Rs, reverse_state): #this method is correct, but wasteful grad = ilayers.GradientWRT(len(Xs)) times_alpha = tensorflow.keras.layers.Lambda(lambda x: x * self._alpha) times_beta = tensorflow.keras.layers.Lambda(lambda x: x * self._beta) keep_positives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.greater(x, 0), K.floatx())) keep_negatives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.less(x, 0), K.floatx())) def f(layer1, layer2, X1, X2): # Get activations of full positive or negative part. Z1 = kutils.apply(layer1, X1) Z2 = kutils.apply(layer2, X2) Zs = [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(Z1, Z2) ] # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp1 = iutils.to_list(grad(X1 + Z1 + tmp)) tmp2 = iutils.to_list(grad(X2 + Z2 + tmp)) # Re-weight relevance with the input values. tmp1 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X1, tmp1) ] tmp2 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X2, tmp2) ] #combine and return return [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(tmp1, tmp2) ] # Distinguish postive and negative inputs. Xs_pos = kutils.apply(keep_positives, Xs) Xs_neg = kutils.apply(keep_negatives, Xs) # xpos*wpos + xneg*wneg activator_relevances = f(self._layer_wo_act_positive, self._layer_wo_act_negative, Xs_pos, Xs_neg) if self._beta: #only compute beta-weighted contributions of beta is not zero # xpos*wneg + xneg*wpos inhibitor_relevances = f(self._layer_wo_act_negative, self._layer_wo_act_positive, Xs_pos, Xs_neg) return [ tensorflow.keras.layers.Subtract()( [times_alpha(a), times_beta(b)]) for a, b in zip(activator_relevances, inhibitor_relevances) ] else: return activator_relevances
def _gradient_reverse_mapping( self, Xs: OptionalList[Tensor], Ys: OptionalList[Tensor], reversed_Ys: OptionalList[Tensor], reverse_state: Dict, ): mask = [x not in reverse_state["stop_mapping_at_tensors"] for x in Xs] masked_grad = ilayers.GradientWRT(len(Xs), mask=mask) return masked_grad(Xs + Ys + reversed_Ys)
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, _Ys, reversed_Ys, _reverse_state: Dict): # Reapply the prepared layers. act_Xs = kutils.apply(self._filter_layer, Xs) act_Ys = kutils.apply(self._act_layer, act_Xs) pattern_Ys = kutils.apply(self._pattern_layer, Xs) # Layers that apply the backward pass. grad_act = ilayers.GradientWRT(len(act_Xs)) grad_pattern = ilayers.GradientWRT(len(Xs)) # First step: propagate through the activation layer. # Workaround for linear activations. linear_activations = [None, keras.activations.get("linear")] if self._act_layer.activation in linear_activations: tmp = reversed_Ys else: # if linear activation this behaves strange tmp = iutils.to_list(grad_act(act_Xs + act_Ys + reversed_Ys)) # Second step: propagate through the pattern layer. return grad_pattern(Xs + pattern_Ys + tmp)
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Create dummy forward path to take the derivative below. Ys = kutils.apply(self._layer_wo_act_b, Xs) # Compute the sum of the weights. ones = ilayers.OnesLike()(Xs) Zs = iutils.to_list(self._layer_wo_act_b(ones)) # Weight the incoming relevance. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Redistribute the relevances along the gradient. tmp = iutils.to_list(grad(Xs + Ys + tmp)) return tmp
def apply(self, Xs, Ys, Rs, reverse_state): # the outputs of the pooling operation at each location is the sum of its inputs. # the forward message must be known in this case, and are the inputs for each pooling thing. # the gradient is 1 for each output-to-input connection, which corresponds to the "weights" # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) #TODO: assert all inputs are positive, instead of only keeping the positives. #keep_positives = keras.layers.Lambda(lambda x: x * K.cast(K.greater(x,0), K.floatx())) #Xs = kutils.apply(keep_positives, Xs) # Get activations. Zs = kutils.apply(self._layer_wo_act_b_positive, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # The epsilon rule aligns epsilon with the (extended) sign: 0 is considered to be positive prepare_div = keras.layers.Lambda(lambda x: x + (K.cast( K.greater_equal(x, 0), K.floatx()) * 2 - 1) * self._epsilon) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.Divide()([a, prepare_div(b)]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def _default_reverse_mapping(self, Xs, Ys, reversed_Ys, reverse_state): ##print(" in _default_reverse_mapping:", reverse_state['layer'].__class__.__name__, '(nid: {})'.format(reverse_state['nid']), end='->') default_return_layers = [keras.layers.Activation]# TODO extend if(len(Xs) == len(Ys) and isinstance(reverse_state['layer'], (keras.layers.Activation,)) and all([K.int_shape(x) == K.int_shape(y) for x, y in zip(Xs, Ys)])): # Expect Xs and Ys to have the same shapes. # There is not mixing of relevances as there is kernel, # therefore we pass them as they are. ##print('return R') return reversed_Ys else: # This branch covers: # MaxPooling # Average Pooling # Max # Flatten # Reshape # Concatenate # Cropping ##print('ilayers.GradientWRT') return ilayers.GradientWRT(len(Xs))(Xs+Ys+reversed_Ys)
def RectGradReverseReLULayer(Xs, Ys, reversed_Ys, reverse_state): def rectgrad(inputs): def threshold(x, q): if len(x.shape.as_list()) > 3: thresh = tf.contrib.distributions.percentile( x, q, axis=[1, 2, 3], keep_dims=True) else: thresh = tf.contrib.distributions.percentile( x, q, axis=1, keep_dims=True) return thresh y, grad = inputs activation_grad = y * grad thresh = threshold(activation_grad, percentile) return tf.where(thresh < activation_grad, grad, tf.zeros_like(grad)) rectgrad_layer = keras.layers.Lambda(rectgrad) y_rev = rectgrad_layer([Ys[0], reversed_Ys[0]]) #print(reversed_Ys[0].shape, y_rev.shape) from innvestigate import layers as ilayers return ilayers.GradientWRT(len(Xs))(Xs + Ys + [y_rev]) return [y_rev]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) to_low = keras.layers.Lambda(lambda x: x * 0 + self._low) to_high = keras.layers.Lambda(lambda x: x * 0 + self._high) def f(Xs): low = [to_low(x) for x in Xs] high = [to_high(x) for x in Xs] A = kutils.apply(self._layer_wo_act, Xs) B = kutils.apply(self._layer_wo_act_positive, low) C = kutils.apply(self._layer_wo_act_negative, high) return [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(A, B, C) ] # Get values for the division. Zs = f(Xs) # Divide relevances with the value. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Distribute along the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) return tmp
def _default_reverse_mapping(self, Xs, Ys, reversed_Ys, reverse_state): return ilayers.GradientWRT(len(Xs))(Xs + Ys + reversed_Ys)