def __init__(self, model, epsilon=1e-1, *args, **kwargs): self._model_checks = [ # TODO: Check for non-linear output in general. { "check": lambda layer: kchecks.contains_activation( layer, activation="softmax"), "type": "exception", "message": "Model should not contain a softmax.", }, { "check": lambda layer: not kchecks.only_relu_activation(layer), "type": "warning", "message": (" is not advised for " "networks with non-ReLU activations.") #TODO: fix. specify. extend. } ] class EpsilonProxyRule(rrule.EpsilonRule): def __init__(self, *args, **kwargs): super(EpsilonProxyRule, self).__init__(*args, epsilon=epsilon, bias=True, **kwargs) conditional_rules = [(kchecks.is_dense_layer, EpsilonProxyRule), (kchecks.is_conv_layer, rrule.Alpha2Beta1Rule) ] super(LRPSequentialPresetB, self).__init__(model, *args, rule = conditional_rules, **kwargs )
def _create_analysis(self, *args, **kwargs): self._add_conditional_reverse_mapping( lambda layer: kchecks.contains_activation(layer, "relu"), GuidedBackpropReverseReLULayer, name="guided_backprop_reverse_relu_layer", ) return super()._create_analysis(*args, **kwargs)
def _create_analysis(self, *args, **kwargs): self._add_conditional_reverse_mapping( lambda layer: kchecks.contains_activation(layer, "relu"), DeconvnetReverseReLULayer, name="deconvnet_reverse_relu_layer", ) return super()._create_analysis(*args, **kwargs)
def _create_analysis(self, *args, **kwargs): self._add_conditional_reverse_mapping( lambda layer: kchecks.contains_activation(layer, "relu"), get_rect_grad_reverse_rule_layer(self._percentile), name="guided_backprop_reverse_relu_layer", ) return super(RectGrad, self)._create_analysis(*args, **kwargs)
def _add_model_softmax_check(self) -> None: """ Adds check that prevents models from containing a softmax. """ contains_softmax: LayerCheck = lambda layer: kchecks.contains_activation( layer, activation="softmax") self._add_model_check( check=contains_softmax, message="This analysis method does not support softmax layers.", check_type="exception", )
def pre_softmax_tensors(Xs: Tensor, should_find_softmax: bool = True) -> List[Tensor]: """Finds the tensors that were preceeding a potential softmax.""" softmax_found = False Xs = iutils.to_list(Xs) ret = [] for x in Xs: layer, node_index, _tensor_index = x._keras_history if kchecks.contains_activation(layer, activation="softmax"): softmax_found = True if isinstance(layer, keras.layers.Activation): ret.append(layer.get_input_at(node_index)) else: layer_wo_act = copy_layer_wo_activation(layer) ret.append(layer_wo_act(layer.get_input_at(node_index))) if should_find_softmax and not softmax_found: raise Exception("No softmax found.") return ret
def copy_layer_wo_activation(layer: Layer, keep_bias: bool = True, name_template: Optional[str] = None, weights: Optional[Union[List[np.ndarray], List[Tensor]]] = None, reuse_symbolic_tensors: bool = True, **kwargs) -> Layer: """Copy a Keras layer and remove the activations Copies a Keras layer but remove potential activations. :param layer: A layer that should be copied. :param keep_bias: Keep a potential bias. :param weights: Weights to set in the new layer. Options: np tensors, symbolic tensors, or None, in which case the weights from old_layers are used. :param reuse_symbolic_tensors: If the weights of the old_layer are used copy the symbolic ones or copy the Numpy weights. :return: The new layer instance. """ config = layer.get_config() if name_template is None: config["name"] = None else: config["name"] = name_template % config["name"] if kchecks.contains_activation(layer): config["activation"] = None if hasattr(layer, "use_bias"): if keep_bias is False and config.get("use_bias", True): config["use_bias"] = False if weights is None: if reuse_symbolic_tensors: weights = layer.weights[:-1] else: weights = layer.get_weights()[:-1] return get_layer_from_config(layer, config, weights=weights, **kwargs)
def contains_activation(layer): return kchecks.contains_activation( layer) and not kchecks.contains_activation(layer, "linear")
def __init__(self, model, allow_lambda_layers=False, **kwargs): # Inside function to not break import if Keras changes. BASELINELRPZ_LAYERS = ( keras.engine.topology.InputLayer, keras.layers.convolutional.Conv1D, keras.layers.convolutional.Conv2D, keras.layers.convolutional.Conv2DTranspose, keras.layers.convolutional.Conv3D, keras.layers.convolutional.Conv3DTranspose, keras.layers.convolutional.Cropping1D, keras.layers.convolutional.Cropping2D, keras.layers.convolutional.Cropping3D, keras.layers.convolutional.SeparableConv1D, keras.layers.convolutional.SeparableConv2D, keras.layers.convolutional.UpSampling1D, keras.layers.convolutional.UpSampling2D, keras.layers.convolutional.UpSampling3D, keras.layers.convolutional.ZeroPadding1D, keras.layers.convolutional.ZeroPadding2D, keras.layers.convolutional.ZeroPadding3D, keras.layers.core.Activation, keras.layers.core.ActivityRegularization, keras.layers.core.Dense, keras.layers.core.Dropout, keras.layers.core.Flatten, keras.layers.core.Lambda, keras.layers.core.Masking, keras.layers.core.Permute, keras.layers.core.RepeatVector, keras.layers.core.Reshape, keras.layers.core.SpatialDropout1D, keras.layers.core.SpatialDropout2D, keras.layers.core.SpatialDropout3D, keras.layers.local.LocallyConnected1D, keras.layers.local.LocallyConnected2D, keras.layers.Add, keras.layers.Concatenate, keras.layers.Dot, keras.layers.Maximum, keras.layers.Minimum, keras.layers.Subtract, keras.layers.noise.AlphaDropout, keras.layers.noise.GaussianDropout, keras.layers.noise.GaussianNoise, keras.layers.normalization.BatchNormalization, keras.layers.pooling.GlobalMaxPooling1D, keras.layers.pooling.GlobalMaxPooling2D, keras.layers.pooling.GlobalMaxPooling3D, keras.layers.pooling.MaxPooling1D, keras.layers.pooling.MaxPooling2D, keras.layers.pooling.MaxPooling3D, ) self._model_checks = [ # todo: Check for non-linear output in general. { "check": lambda layer: kchecks.contains_activation( layer, activation="softmax"), "type": "exception", "message": "Model should not contain a softmax.", }, { "check": lambda layer: not kchecks.only_relu_activation(layer), "type": "exception", "message": ("BaselineLRPZ is not working for " "networks with non-ReLU activations."), }, { "check": lambda layer: not isinstance(layer, BASELINELRPZ_LAYERS), "type": "exception", "message": ("BaselineLRPZ is only defined for " "certain layers."), }, { "check": lambda layer: (not allow_lambda_layers and isinstance(layer, keras.layers.core.Lambda)), "type": "exception", "message": ("Lamda layers are not allowed. " "To allow use allow_lambda_layers kw."), }, ] self._allow_lambda_layers = allow_lambda_layers super(BaselineLRPZ, self).__init__(model, **kwargs)
def __init__(self, model, *args, **kwargs): rule = kwargs.pop("rule", None) input_layer_rule = kwargs.pop("input_layer_rule", None) self._model_checks = [ # TODO: Check for non-linear output in general. { "check": lambda layer: kchecks.contains_activation( layer, activation="softmax"), "type": "exception", "message": "Model should not contain a softmax.", }, { "check": lambda layer: not kchecks.is_convnet_layer(layer), "type": "warning", "message": ("LRP is only tested for " "convolutional neural networks."), }, ] # check if rule was given explicitly. # rule can be a string, a list (of strings) or a list of conditions [(Condition, Rule), ... ] for each layer. if rule is None: raise ValueError("Need LRP rule(s).") if isinstance(rule, list): # copy refrences self._rule = list(rule) else: self._rule = rule self._input_layer_rule = input_layer_rule if( isinstance(rule, six.string_types) or (inspect.isclass(rule) and issubclass(rule, kgraph.ReverseMappingBase)) # NOTE: All LRP rules inherit from kgraph.ReverseMappingBase ): # the given rule is a single string or single rule implementing cla ss use_conditions = True rules = [(lambda a, b: True, rule)] elif not isinstance(rule[0], tuple): # rule list of rule strings or classes use_conditions = False rules = list(rule) else: # rule is list of conditioned rules use_conditions = True rules = rule # create a BoundedRule for input layer handling from given tuple if self._input_layer_rule is not None: input_layer_rule = self._input_layer_rule if isinstance(input_layer_rule, tuple): low, high = input_layer_rule class BoundedProxyRule(rrule.BoundedRule): def __init__(self, *args, **kwargs): super(BoundedProxyRule, self).__init__( *args, low=low, high=high, **kwargs) input_layer_rule = BoundedProxyRule if use_conditions is True: rules.insert(0, (lambda layer, foo: kchecks.is_input_layer(layer), input_layer_rule)) else: rules.insert(0, input_layer_rule) #################################################################### ### Functionality responible for backwards rule selection below #### #################################################################### def select_rule(layer, reverse_state): ##print("in select_rule:", layer.__class__.__name__ , end='->') #debug if use_conditions is True: for condition, rule in rules: if condition(layer, reverse_state): ##print(str(rule)) #debug return rule raise Exception("No rule applies to layer: %s" % layer) else: ##print(str(rules[0]), '(via pop)') #debug return rules.pop() # default backward hook class ReverseLayer(kgraph.ReverseMappingBase): def __init__(self, layer, state): rule_class = select_rule(layer, state) #NOTE: this prevents refactoring. ##print("in ReverseLayer.init:",layer.__class__.__name__,"->" , rule_class if isinstance(rule_class, six.string_types) else rule_class.__name__) #debug if isinstance(rule_class, six.string_types): rule_class = LRP_RULES[rule_class] self._rule = rule_class(layer, state) def apply(self, Xs, Ys, Rs, reverse_state): ##print(" in ReverseLayer.apply:", reverse_state['layer'].__class__.__name__, '(nid: {})'.format(reverse_state['nid']) , '-> {}.apply'.format(self._rule.__class__.__name__)) return self._rule.apply(Xs, Ys, Rs, reverse_state) #specialized backward hooks. TODO: add ReverseLayer class handling layers Without kernel: Add and AvgPool class BatchNormalizationReverseLayer(kgraph.ReverseMappingBase): def __init__(self, layer, state): ##print("in BatchNormalizationReverseLayer.init:", layer.__class__.__name__,"-> Dedicated ReverseLayer class" ) #debug config = layer.get_config() self._center = config['center'] self._scale = config['scale'] self._axis = config['axis'] self._mean = layer.moving_mean self._std = layer.moving_variance if self._center: self._beta = layer.beta #TODO: implement rule support. for BatchNormalization -> [BNEpsilon, BNAlphaBeta, BNIgnore] #super(BatchNormalizationReverseLayer, self).__init__(layer, state) # how to do this: # super.__init__ calls select_rule and sets a self._rule class # check if isinstance(self_rule, EpsiloneRule), then reroute # to BatchNormEpsilonRule. Not pretty, but should work. def apply(self, Xs, Ys, Rs, reverse_state): ##print(" in BatchNormalizationReverseLayer.apply:", reverse_state['layer'].__class__.__name__, '(nid: {})'.format(reverse_state['nid'])) input_shape = [K.int_shape(x) for x in Xs] if len(input_shape) != 1: #extend below lambda layers towards multiple parameters. raise ValueError("BatchNormalizationReverseLayer expects Xs with len(Xs) = 1, but was len(Xs) = {}".format(len(Xs))) input_shape = input_shape[0] # prepare broadcasting shape for layer parameters broadcast_shape = [1] * len(input_shape) broadcast_shape[self._axis] = input_shape[self._axis] broadcast_shape[0] = -1 #reweight relevances as # x * (y - beta) R # Rin = ---------------- * ---- # x - mu y # batch norm can be considered as 3 distinct layers of subtraction, # multiplication and then addition. The multiplicative scaling layer # has no effect on LRP and functions as a linear activation layer minus_mu = keras.layers.Lambda(lambda x: x - K.reshape(self._mean, broadcast_shape)) minus_beta = keras.layers.Lambda(lambda x: x - K.reshape(self._beta, broadcast_shape)) prepare_div = keras.layers.Lambda(lambda x: x + (K.cast(K.greater_equal(x,0), K.floatx())*2-1)*K.epsilon()) x_minus_mu = kutils.apply(minus_mu, Xs) if self._center: y_minus_beta = kutils.apply(minus_beta, Ys) else: y_minus_beta = Ys numerator = [keras.layers.Multiply()([x, ymb, r]) for x, ymb, r in zip(Xs, y_minus_beta, Rs)] denominator = [keras.layers.Multiply()([xmm, y]) for xmm, y in zip(x_minus_mu, Ys)] return [ilayers.SafeDivide()([n, prepare_div(d)]) for n, d in zip(numerator, denominator)] class AddReverseLayer(kgraph.ReverseMappingBase): def __init__(self, layer, state): ##print("in AddReverseLayer.init:", layer.__class__.__name__,"-> Dedicated ReverseLayer class" ) #debug self._layer_wo_act = kgraph.copy_layer_wo_activation(layer, name_template="reversed_kernel_%s") #TODO: implement rule support. #super(AddReverseLayer, self).__init__(layer, state) def apply(self, Xs, Ys, Rs, reverse_state): # the outputs of the pooling operation at each location is the sum of its inputs. # the forward message must be known in this case, and are the inputs for each pooling thing. # the gradient is 1 for each output-to-input connection, which corresponds to the "weights" # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs+Zs+tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)] class AveragePoolingRerseLayer(kgraph.ReverseMappingBase): def __init__(self, layer, state): ##print("in AveragePoolingRerseLayer.init:", layer.__class__.__name__,"-> Dedicated ReverseLayer class" ) #debug self._layer_wo_act = kgraph.copy_layer_wo_activation(layer, name_template="reversed_kernel_%s") #TODO: implement rule support. #super(AveragePoolingRerseLayer, self).__init__(layer, state) def apply(self, Xs, Ys, Rs, reverse_state): # the outputs of the pooling operation at each location is the sum of its inputs. # the forward message must be known in this case, and are the inputs for each pooling thing. # the gradient is 1 for each output-to-input connection, which corresponds to the "weights" # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs+Zs+tmp)) # Re-weight relevance with the input values. return [keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp)] # conditional mappings layer_criterion -> ReverseLayer on how to handle backward passes through layers. self._conditional_mappings = [ (kchecks.contains_kernel, ReverseLayer), (kchecks.is_batch_normalization_layer, BatchNormalizationReverseLayer), (kchecks.is_average_pooling, AveragePoolingRerseLayer), (kchecks.is_add_layer, AddReverseLayer), ] # FINALIZED constructor. super(LRP, self).__init__(model, *args, **kwargs)
def _create_analysis(self, *args: Any, **kwargs: Any): # Kernel layers. self._add_conditional_reverse_mapping( lambda l: (kchecks.contains_kernel(l) and kchecks.contains_activation(l)), lrp_rules.Alpha1Beta0IgnoreBiasRule, name="deep_taylor_kernel_w_relu", ) self._add_conditional_reverse_mapping( lambda l: (kchecks.contains_kernel(l) and not kchecks. contains_activation(l)), lrp_rules.WSquareRule, name="deep_taylor_kernel_wo_relu", ) # ReLU Activation layer self._add_conditional_reverse_mapping( lambda l: (not kchecks.contains_kernel(l) and kchecks. only_relu_activation(l)), self._gradient_reverse_mapping, name="deep_taylor_relu", ) # Assume conv layer beforehand -> unbounded bn_mapping = kgraph.apply_mapping_to_fused_bn_layer( lrp_rules.WSquareRule, fuse_mode="one_linear", ) self._add_conditional_reverse_mapping( kchecks.is_batch_normalization_layer, bn_mapping, name="deep_taylor_batch_norm", ) # Special layers. self._add_conditional_reverse_mapping( kchecks.is_max_pooling, self._gradient_reverse_mapping, name="deep_taylor_max_pooling", ) self._add_conditional_reverse_mapping( kchecks.is_average_pooling, self._gradient_reverse_mapping, name="deep_taylor_average_pooling", ) self._add_conditional_reverse_mapping( lambda l: isinstance(l, keras.layers.Add), # Ignore scaling with 0.5 self._gradient_reverse_mapping, name="deep_taylor_add", ) self._add_conditional_reverse_mapping( lambda l: isinstance( l, ( keras.layers.convolutional.UpSampling1D, keras.layers.convolutional.UpSampling2D, keras.layers.convolutional.UpSampling3D, keras.layers.core.Dropout, keras.layers.core.SpatialDropout1D, keras.layers.core.SpatialDropout2D, keras.layers.core.SpatialDropout3D, ), ), self._gradient_reverse_mapping, name="deep_taylor_special_layers", ) # Layers w/o transformation self._add_conditional_reverse_mapping( lambda l: isinstance( l, ( keras.engine.topology.InputLayer, keras.layers.convolutional.Cropping1D, keras.layers.convolutional.Cropping2D, keras.layers.convolutional.Cropping3D, keras.layers.convolutional.ZeroPadding1D, keras.layers.convolutional.ZeroPadding2D, keras.layers.convolutional.ZeroPadding3D, keras.layers.Concatenate, keras.layers.core.Flatten, keras.layers.core.Masking, keras.layers.core.Permute, keras.layers.core.RepeatVector, keras.layers.core.Reshape, ), ), self._gradient_reverse_mapping, name="deep_taylor_no_transform", ) return super()._create_analysis(*args, **kwargs)
def test_contains_activation_general(activation, expected): layer = keras.layers.Dense(5, activation=activation) assert ichecks.contains_activation(layer) == expected
def test_contains_activation_softmax(activation, expected): layer = keras.layers.Dense(5, activation=activation) assert ichecks.contains_activation(layer, "softmax") == expected
def test_contains_activation_elu(activation, layer, expected): assert ichecks.contains_activation(layer, "elu") == expected