def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) to_low = keras.layers.Lambda(lambda x: x * 0 + self._low) to_high = keras.layers.Lambda(lambda x: x * 0 + self._high) low = [to_low(x) for x in Xs] high = [to_high(x) for x in Xs] # Get values for the division. A = kutils.apply(self._layer_wo_act, Xs) B = kutils.apply(self._layer_wo_act_positive, low) C = kutils.apply(self._layer_wo_act_negative, high) Zs = [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(A, B, C) ] # Divide relevances with the value. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Distribute along the gradient. tmpA = iutils.to_list(grad(Xs + A + tmp)) tmpB = iutils.to_list(grad(low + B + tmp)) tmpC = iutils.to_list(grad(high + C + tmp)) tmpA = [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmpA)] tmpB = [keras.layers.Multiply()([a, b]) for a, b in zip(low, tmpB)] tmpC = [keras.layers.Multiply()([a, b]) for a, b in zip(high, tmpC)] tmp = [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(tmpA, tmpB, tmpC) ] return tmp
def f(layer1, layer2, X1, X2): # Get activations of full positive or negative part. Z1 = kutils.apply(layer1, X1) Z2 = kutils.apply(layer2, X2) Zs = [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(Z1, Z2) ] # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp1 = iutils.to_list(grad(X1 + Z1 + tmp)) tmp2 = iutils.to_list(grad(X2 + Z2 + tmp)) # Re-weight relevance with the input values. tmp1 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X1, tmp1) ] tmp2 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X2, tmp2) ] #combine and return return [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(tmp1, tmp2) ]
def f(layer, X): Zs = kutils.apply(layer, X) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp = iutils.to_list(grad(X + Zs + tmp)) # Re-weight relevance with the input values. tmp = [keras.layers.Multiply()([a, b]) for a, b in zip(X, tmp)] return tmp
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, Ys, Rs, _reverse_state: Dict): input_shape = [K.int_shape(x) for x in Xs] if len(input_shape) != 1: # extend below lambda layers towards multiple parameters. raise ValueError( "BatchNormalizationReverseLayer expects Xs with len(Xs) = 1, but was len(Xs) = {}".format( # noqa len(Xs) ) ) input_shape = input_shape[0] # prepare broadcasting shape for layer parameters broadcast_shape = [1] * len(input_shape) broadcast_shape[self._axis] = input_shape[self._axis] broadcast_shape[0] = -1 # reweight relevances as # x * (y - beta) R # Rin = ---------------- * ---- # x - mu y # batch norm can be considered as 3 distinct layers of subtraction, # multiplication and then addition. The multiplicative scaling layer # has no effect on LRP and functions as a linear activation layer minus_mu = keras.layers.Lambda( lambda x: x - K.reshape(self._mean, broadcast_shape) ) minus_beta = keras.layers.Lambda( lambda x: x - K.reshape(self._beta, broadcast_shape) ) prepare_div = keras.layers.Lambda( lambda x: x + (K.cast(K.greater_equal(x, 0), K.floatx()) * 2 - 1) * K.epsilon() ) x_minus_mu = kutils.apply(minus_mu, Xs) if self._center: y_minus_beta = kutils.apply(minus_beta, Ys) else: y_minus_beta = Ys numerator = [ keras.layers.Multiply()([x, ymb, r]) for x, ymb, r in zip(Xs, y_minus_beta, Rs) ] denominator = [ keras.layers.Multiply()([xmm, y]) for xmm, y in zip(x_minus_mu, Ys) ] return [ ilayers.SafeDivide()([n, prepare_div(d)]) for n, d in zip(numerator, denominator) ]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Create dummy forward path to take the derivative below. Ys = kutils.apply(self._layer_wo_act_b, Xs) # Compute the sum of the weights. ones = ilayers.OnesLike()(Xs) Zs = iutils.to_list(self._layer_wo_act_b(ones)) # Weight the incoming relevance. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Redistribute the relevances along the gradient. tmp = iutils.to_list(grad(Xs + Ys + tmp)) return tmp
def apply(self, Xs, Ys, Rs, reverse_state): # the outputs of the pooling operation at each location is the sum of its inputs. # the forward message must be known in this case, and are the inputs for each pooling thing. # the gradient is 1 for each output-to-input connection, which corresponds to the "weights" # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) #TODO: assert all inputs are positive, instead of only keeping the positives. #keep_positives = keras.layers.Lambda(lambda x: x * K.cast(K.greater(x,0), K.floatx())) #Xs = kutils.apply(keep_positives, Xs) # Get activations. Zs = kutils.apply(self._layer_wo_act_b_positive, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) to_low = keras.layers.Lambda(lambda x: x * 0 + self._low) to_high = keras.layers.Lambda(lambda x: x * 0 + self._high) def f(Xs): low = [to_low(x) for x in Xs] high = [to_high(x) for x in Xs] A = kutils.apply(self._layer_wo_act, Xs) B = kutils.apply(self._layer_wo_act_positive, low) C = kutils.apply(self._layer_wo_act_negative, high) return [ keras.layers.Subtract()([a, keras.layers.Add()([b, c])]) for a, b, c in zip(A, B, C) ] # Get values for the division. Zs = f(Xs) # Divide relevances with the value. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Distribute along the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) return tmp
def norm(x, count): return ilayers.SafeDivide(factor=1)([x, count])