Exemple #1
0
 def test_qoibprop_identity(self):
     self.assertTrue(
         np.allclose(
             self.model.qoi_bprop(
                 MaxClassQoI(), (np.array([[2., 1.], [1., 2.]]),),
                 attribution_cut=InputCut(),
                 to_cut=InputCut()), np.array([[1., 0.], [0., 1.]])))
Exemple #2
0
    def test_multiple_inputs(self):

        class M(Module):

            def __init__(this):
                super(M, this).__init__()
                this.z1 = Linear(5, 6)
                this.z3 = Linear(7, 7)
                this.y = Linear(7, 3)

            def forward(this, x1, x2):
                x1 = this.z1(x1)
                z = cat((x1, x2), 1)
                z = this.z3(z)
                return this.y(z)

        model = ModelWrapper(M(), [(5,), (1,)])

        infl = InternalInfluence(model, InputCut(), ClassQoI(1), PointDoi())

        res = infl.attributions(
            np.array([[1., 2., 3., 4., 5.]]).astype('float32'),
            np.array([[1.]]).astype('float32'))

        self.assertEqual(len(res), 2)
        self.assertEqual(res[0].shape, (1, 5))
        self.assertEqual(res[1].shape, (1, 1))
Exemple #3
0
    def test_batch_processing_deep(self):
        infl = InternalInfluence(self.model_deep, InputCut(), MaxClassQoI(),
                                 LinearDoi())

        r1 = np.concatenate([infl.attributions(x[None]) for x in self.batch_x])
        r2 = infl.attributions(self.batch_x)

        self.assertTrue(np.allclose(r1, r2))
    def test_linear_agreement_multiply_activation(self):
        c = 1
        infl = InternalInfluence(
            self.model_lin,
            InputCut(),
            ClassQoI(c),
            PointDoi(),
            multiply_activation=True)

        res = infl.attributions(self.x)

        self.assertEqual(res.shape, (2, self.input_size))

        self.assertTrue(np.allclose(res, self.model_lin_weights[:, c] * self.x))
    def test_distributional_linearity(self):
        x1, x2 = self.x[0:1], self.x[1:]
        p1, p2 = 0.25, 0.75

        class DistLinDoI(DoI):
            '''
            Represents the distribution of interest that weights `z` with
            probability 1/4 and `z + diff` with probability 3/4.
            '''

            def __init__(self, diff):
                super(DistLinDoI, self).__init__()
                self.diff = diff

            def __call__(self, z):
                return [z, z + self.diff, z + self.diff, z + self.diff]

        infl_pt = InternalInfluence(
            self.model_deep,
            InputCut(),
            ClassQoI(0),
            PointDoi(),
            multiply_activation=False)

        attr1 = infl_pt.attributions(x1)
        attr2 = infl_pt.attributions(x2)

        infl_dl = InternalInfluence(
            self.model_deep,
            InputCut(),
            ClassQoI(0),
            DistLinDoI(x2 - x1),
            multiply_activation=False)

        attr12 = infl_dl.attributions(x1)

        self.assertTrue(np.allclose(attr12, p1 * attr1 + p2 * attr2))
    def test_idempotence(self):
        infl = InternalInfluence(
            self.model_lin,
            InputCut(),
            MaxClassQoI(),
            PointDoi(),
            multiply_activation=False)

        res1 = infl.attributions(self.x)
        res2 = infl.attributions(self.x)

        self.assertTrue(np.allclose(res1, res2))

        infl_act = InternalInfluence(
            self.model_lin,
            InputCut(),
            MaxClassQoI(),
            PointDoi(),
            multiply_activation=True)

        res1 = infl_act.attributions(self.x)
        res2 = infl_act.attributions(self.x)

        self.assertTrue(np.allclose(res1, res2))
    def test_completeness_zero_baseline(self):
        c = 2
        infl = InternalInfluence(
            self.model_deep,
            InputCut(),
            ClassQoI(c),
            LinearDoi(resolution=100),
            multiply_activation=True)

        out_x = self.model_deep.fprop((self.x,))[0][:, c]
        out_baseline = self.model_deep.fprop((self.baseline * 0,))[0][:, c]

        res = infl.attributions(self.x)

        self.assertTrue(
            np.allclose(res.sum(axis=1), out_x - out_baseline, atol=5e-2))
    def test_sensitivity(self):
        c = 2
        infl = InternalInfluence(
            self.model_deep,
            InputCut(),
            ClassQoI(c),
            LinearDoi(self.baseline),
            multiply_activation=False)

        out_x = self.model_deep.fprop((self.x[0:1],))[0][:, c]
        out_baseline = self.model_deep.fprop((self.baseline,))[0][:, c]

        if not np.allclose(out_x, out_baseline):
            res = infl.attributions(self.x)

            self.assertEqual(res.shape, (2, self.input_size))

            self.assertNotEqual(res[0, 3], 0.)
    def test_multiple_inputs(self):
        x1 = Input((5, ))
        z1 = Dense(6)(x1)
        x2 = Input((1, ))
        z2 = Concatenate()([z1, x2])
        z3 = Dense(7)(z2)
        y = Dense(3)(z3)

        model = ModelWrapper(Model([x1, x2], y))

        infl = InternalInfluence(model, InputCut(), ClassQoI(1), PointDoi())

        res = infl.attributions(
            [np.array([[1., 2., 3., 4., 5.]]),
             np.array([[1.]])])

        self.assertEqual(len(res), 2)
        self.assertEqual(res[0].shape, (1, 5))
        self.assertEqual(res[1].shape, (1, 1))
Exemple #10
0
    def __get_cut(cut_arg):
        """
        Helper function to get a `Cut` object from more user-friendly primitive
        arguments.
        """
        if isinstance(cut_arg, Cut):
            # We are already given a Cut, so return it.
            return cut_arg

        elif cut_arg is None or cut_arg == 0:
            # If we receive None or zero, we take it to be the input cut.
            return InputCut()

        # TODO(klas): may want a bit more validation here.
        elif isinstance(cut_arg, int) or isinstance(cut_arg, str):
            return Cut(cut_arg)

        else:
            raise ValueError('Unrecognized argument type for cut')
    def test_multiple_inputs(self):
        graph = Graph()

        with graph.as_default():
            x1 = tf.placeholder('float32', (None, 5))
            z1 = x1 @ tf.random.normal((5, 6))
            x2 = tf.placeholder('float32', (None, 1))
            z2 = tf.concat([z1, x2], axis=1)
            z3 = z2 @ tf.random.normal((7, 7))
            y = z3 @ tf.random.normal((7, 3))

        model = ModelWrapper(graph, [x1, x2], y)

        infl = InternalInfluence(model, InputCut(), ClassQoI(1), PointDoi())

        res = infl.attributions(
            [np.array([[1., 2., 3., 4., 5.]]),
             np.array([[1.]])])

        self.assertEqual(len(res), 2)
        self.assertEqual(res[0].shape, (1, 5))
        self.assertEqual(res[1].shape, (1, 1))
Exemple #12
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t
            `attribution_cut`
        doi_cut: Cut, 
            If `doi_cut` is None, this refers to the InputCut. Cut from which to
            begin propagation. The shape of `intervention` must match the output
            shape of this layer.
        attribution_cut: Cut, optional
            If `attribution_cut` is None, this refers to the InputCut. The Cut
            in which attribution will be calculated. This is generally taken
            from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            If `to_cut` is None, this refers to the OutputCut. The Cut in which
            qoi will be calculated. This is generally taken from the attribution
            slice's `to_cut`.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            The gradients of `qoi` w.r.t. `attribution_cut`, keeping same type
            as the input.
        """

        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_cut = doi_cut if doi_cut else InputCut()

        attribution_tensors = self._get_layers(attribution_cut)
        to_tensors = self._get_layers(to_cut)
        doi_tensors = self._get_layers(doi_cut)
        if intervention is None:
            intervention = model_args

        intervention = intervention if isinstance(
            intervention, DATA_CONTAINER_TYPE) else [intervention]

        Q = qoi(to_tensors[0]) if len(to_tensors) == 1 else qoi(to_tensors)

        gradients = [
            keras.backend.function(
                doi_tensors, B.gradient(q, attribution_tensors))(intervention)
            for q in Q
        ] if isinstance(Q, DATA_CONTAINER_TYPE) else keras.backend.function(
            doi_tensors, B.gradient(Q, attribution_tensors))(intervention)

        return gradients[0] if len(gradients) == 1 else gradients
Exemple #13
0
    def __init__(self,
                 model,
                 layer,
                 channel,
                 channel_axis=B.channel_axis,
                 agg_fn=None,
                 doi=None,
                 blur=None,
                 threshold=0.5,
                 masked_opacity=0.2,
                 combine_channels=True,
                 use_attr_as_opacity=None,
                 positive_only=None):
        """
        Configures the default parameters for the `__call__` method (these can 
        be overridden by passing in values to `__call__`).

        Parameters:
            model:
                The wrapped model whose channel we're visualizing.

            layer:
                The identifier (either index or name) of the layer in which the 
                channel we're visualizing resides.

            channel:
                Index of the channel (for convolutional layers) or internal 
                neuron (for fully-connected layers) that we'd like to visualize.

            channel_axis:
                If different from the channel axis specified by the backend, the
                supplied `channel_axis` will be used if operating on a 
                convolutional layer with 4-D image format.

            agg_fn:
                Function with which to aggregate the remaining dimensions 
                (except the batch dimension) in order to get a single scalar 
                value for each channel; If `None`, a sum over each neuron in the
                channel will be taken. This argument is not used when the 
                channels are scalars, e.g., for dense layers.

            doi:
                The distribution of interest to use when computing the input
                attributions towards the specified channel. If `None`, 
                `PointDoI` will be used.

            blur:
                Gives the radius of a Gaussian blur to be applied to the 
                attributions before visualizing. This can be used to help focus
                on salient regions rather than specific salient pixels.

            threshold:
                Value in the range [0, 1]. Attribution values at or  below the 
                percentile given by `threshold` (after normalization, blurring,
                etc.) will be masked.

            masked_opacity: 
                Value in the range [0, 1] specifying the opacity for the parts
                of the image that are masked.

            combine_channels:
                If `True`, the attributions will be averaged across the channel
                dimension, resulting in a 1-channel attribution map.

            use_attr_as_opacity:
                If `True`, instead of using `threshold` and `masked_opacity`,
                the opacity of each pixel is given by the 0-1-normalized 
                attribution value.

            positive_only:
                If `True`, only pixels with positive attribution will be 
                unmasked (or given nonzero opacity when `use_attr_as_opacity` is
                true).
        """

        self.mask_visualizer = MaskVisualizer(blur, threshold, masked_opacity,
                                              combine_channels,
                                              use_attr_as_opacity,
                                              positive_only)

        self.infl_input = InternalInfluence(
            model, (InputCut(), Cut(layer)),
            InternalChannelQoI(channel, channel_axis, agg_fn),
            PointDoi() if doi is None else doi)
Exemple #14
0
    def __init__(
            self,
            model: ModelWrapper,
            cut: CutLike = None,
            qoi: QoiLike = 'max',
            doi: DoiLike = 'point',
            multiply_activation: bool = True):
        """
        Parameters:
            model :
                Model for which attributions are calculated.

            cut :
                The cut determining the layer from which the QoI is derived.
                Expects a `Cut` object, or a related type that can be 
                interpreted as a `Cut`, as documented below.

                If an `int` is given, it represents the index of a layer in 
                `model`. 

                If a `str` is given, it represents the name of a layer in 
                `model`. 
                
                `None` is an alternative for `slices.OutputCut()`.

            qoi : quantities.QoI | int | tuple | str
                Quantity of interest to attribute. Expects a `QoI` object, or a
                related type that can be interpreted as a `QoI`, as documented
                below.

                If an `int` is given, the quantity of interest is taken to be 
                the slice output for the class/neuron/channel specified by the 
                given integer, i.e., 
                ```python
                quantities.InternalChannelQoI(qoi)
                ```

                If a tuple or list of two integers is given, then the quantity 
                of interest is taken to be the comparative quantity for the 
                class given by the first integer against the class given by the 
                second integer, i.e., 
                ```python
                quantities.ComparativeQoI(*qoi)
                ```

                If a callable is given, it is interpreted as a function
                representing the QoI, i.e.,
                ```python
                quantities.LambdaQoI(qoi)
                ```

                If the string, `'max'`, is given, the quantity of interest is 
                taken to be the output for the class with the maximum score, 
                i.e., 
                ```python
                quantities.MaxClassQoI()
                ```

            doi : distributions.DoI | str
                Distribution of interest over inputs. Expects a `DoI` object, or
                a related type that can be interpreted as a `DoI`, as documented
                below.

                If the string, `'point'`, is given, the distribution is taken to
                be the single point passed to `attributions`, i.e., 
                ```python
                distributions.PointDoi()
                ```

                If the string, `'linear'`, is given, the distribution is taken 
                to be the linear interpolation from the zero input to the point 
                passed to `attributions`, i.e., 
                ```python
                distributions.LinearDoi()
                ```

            multiply_activation : bool, optional
                Whether to multiply the gradient result by its corresponding
                activation, thus converting from "*influence space*" to 
                "*attribution space*."
        """
        super().__init__(
            model, (InputCut(), cut),
            qoi,
            doi,
            multiply_activation=multiply_activation)
Exemple #15
0
    def attributions(self, *model_args, **model_kwargs):
        doi_cut = self.doi.cut() if self.doi.cut() else InputCut()

        doi_val = self.model.fprop(model_args, model_kwargs, to_cut=doi_cut)

        # DoI supports tensor or list of tensor. unwrap args to perform DoI on
        # top level list

        # Depending on the model_arg input, the data may be nested in data
        # containers. We unwrap so that there operations are working on a single
        # level of data container.
        if isinstance(doi_val, DATA_CONTAINER_TYPE) and isinstance(
                doi_val[0], DATA_CONTAINER_TYPE):
            doi_val = doi_val[0]

        if isinstance(doi_val, DATA_CONTAINER_TYPE) and len(doi_val) == 1:
            doi_val = doi_val[0]

        D = self.doi(doi_val)
        n_doi = len(D)
        D = InternalInfluence.__concatenate_doi(D)

        # Calculate the gradient of each of the points in the DoI.
        qoi_grads = self.model.qoi_bprop(
            self.qoi,
            model_args,
            model_kwargs,
            attribution_cut=self.slice.from_cut,
            to_cut=self.slice.to_cut,
            intervention=D,
            doi_cut=doi_cut)
        # Take the mean across the samples in the DoI.
        if isinstance(qoi_grads, DATA_CONTAINER_TYPE):
            attributions = [
                B.mean(
                    B.reshape(qoi_grad, (n_doi, -1) + qoi_grad.shape[1:]),
                    axis=0) for qoi_grad in qoi_grads
            ]
        else:
            attributions = B.mean(
                B.reshape(qoi_grads, (n_doi, -1) + qoi_grads.shape[1:]), axis=0)

        # Multiply by the activation multiplier if specified.
        if self._do_multiply:
            z_val = self.model.fprop(
                model_args, model_kwargs, to_cut=self.slice.from_cut)
            if isinstance(z_val, DATA_CONTAINER_TYPE) and len(z_val) == 1:
                z_val = z_val[0]

            if isinstance(attributions, DATA_CONTAINER_TYPE):
                for i in range(len(attributions)):
                    if isinstance(z_val, DATA_CONTAINER_TYPE) and len(
                            z_val) == len(attributions):
                        attributions[i] *= self.doi.get_activation_multiplier(
                            z_val[i])
                    else:
                        attributions[i] *= (
                            self.doi.get_activation_multiplier(z_val))

            else:
                attributions *= self.doi.get_activation_multiplier(z_val)

        return attributions
Exemple #16
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t
            `attribution_cut`.
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut.
            Cut from which to begin propagation. The shape of `intervention`
            must match the output shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        y, zs = self.fprop(model_args,
                           model_kwargs,
                           doi_cut=doi_cut if doi_cut else InputCut(),
                           to_cut=to_cut,
                           attribution_cut=attribution_cut,
                           intervention=intervention,
                           return_tensor=True)

        y = to_cut.access_layer(y)
        grads_list = []
        for z in zs:
            z_flat = ModelWrapper._flatten(z)
            qoi_out = qoi(y)

            grads_flat = [B.gradient(B.sum(q), z_flat)
                          for q in qoi_out] if isinstance(
                              qoi_out, DATA_CONTAINER_TYPE) else B.gradient(
                                  B.sum(qoi_out), z_flat)

            grads = [
                ModelWrapper._unflatten(g, z, count=[0]) for g in grads_flat
            ] if isinstance(qoi_out,
                            DATA_CONTAINER_TYPE) else ModelWrapper._unflatten(
                                grads_flat, z, count=[0])

            grads = [
                attribution_cut.access_layer(g) for g in grads
            ] if isinstance(
                qoi_out,
                DATA_CONTAINER_TYPE) else attribution_cut.access_layer(grads)

            grads = [B.as_array(g) for g in grads] if isinstance(
                qoi_out, DATA_CONTAINER_TYPE) else B.as_array(grads)

            grads_list.append(grads)

        del y  # TODO: garbage collection

        return grads_list[0] if len(grads_list) == 1 else grads_list
Exemple #17
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g., 
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t 
            `attribution_cut`
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut. Cut from which to
            begin propagation. The shape of `intervention` must match the output
            shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if intervention is None:
            intervention = model_args

        if not self._eager:
            return super().qoi_bprop(qoi, model_args, model_kwargs, doi_cut,
                                     to_cut, attribution_cut, intervention)

        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        return_numpy = True

        with tf.GradientTape(persistent=True) as tape:

            intervention = intervention if isinstance(
                intervention, DATA_CONTAINER_TYPE) else [intervention]
            # We return a numpy array if we were given a numpy array; otherwise
            # we will let the returned values remain data tensors.
            return_numpy = isinstance(intervention, np.ndarray) or isinstance(
                intervention[0], np.ndarray)

            # Convert `intervention` to a data tensor if it isn't already.

            if return_numpy:
                intervention = [
                    ModelWrapper._nested_apply(x_i, tf.constant)
                    for x_i in intervention
                ]

            for x_i in intervention:
                ModelWrapper._nested_apply(x_i, tape.watch)

            outputs, attribution_features = self.fprop(
                model_args,
                model_kwargs,
                doi_cut=doi_cut if doi_cut else InputCut(),
                to_cut=to_cut,
                attribution_cut=attribution_cut,
                intervention=intervention)
            if isinstance(outputs, DATA_CONTAINER_TYPE) and isinstance(
                    outputs[0], DATA_CONTAINER_TYPE):
                outputs = outputs[0]

            Q = qoi(outputs[0]) if len(outputs) == 1 else qoi(outputs)
            if isinstance(Q, DATA_CONTAINER_TYPE) and len(Q) == 1:
                Q = B.sum(Q)

        grads = [tape.gradient(q, attribution_features) for q in Q
                 ] if isinstance(Q, DATA_CONTAINER_TYPE) else tape.gradient(
                     Q, attribution_features)

        grads = grads[0] if isinstance(
            grads, DATA_CONTAINER_TYPE) and len(grads) == 1 else grads

        grads = [attribution_cut.access_layer(g) for g in grads] if isinstance(
            grads,
            DATA_CONTAINER_TYPE) else attribution_cut.access_layer(grads)

        del tape

        if return_numpy:
            grads = [ModelWrapper._nested_apply(g, B.as_array)
                     for g in grads] if isinstance(
                         grads,
                         DATA_CONTAINER_TYPE) else ModelWrapper._nested_apply(
                             grads, B.as_array)

        return grads[0] if isinstance(
            grads, DATA_CONTAINER_TYPE) and len(grads) == 1 else grads
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI).
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, will be
            converted to a tensor on the same device as the model. Intervention
            can also be a `feed_dict`.

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, keeping same type as the
            input. If `attribution_cut` is supplied, also return the cut 
            activations.
        """

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_tensors = self._get_layers(doi_cut)
        to_tensors = self._get_layers(to_cut)

        feed_dict, intervention = self._prepare_feed_dict_with_intervention(
            model_args, model_kwargs, intervention, doi_tensors)

        # Tensorlow doesn't allow you to make a function that returns the same
        # tensor as it takes in. Thus, we have to have a special case for the
        # identity function. Any tensors that are both in `doi_tensors` and
        # `to_tensors` cannot be computed via a `keras.backend.function` and
        # thus need to be taken from the input, `x`.
        identity_map = {
            i: j
            for i, to_tensor in enumerate(to_tensors)
            for j, from_tensor in enumerate(doi_tensors)
            if to_tensor == from_tensor
        }

        non_identity_to_tensors = [
            to_tensor for i, to_tensor in enumerate(to_tensors)
            if i not in identity_map
        ]

        # Compute the output values of `to_tensors` unless all `to_tensor`s were
        # also `doi_tensors`.
        if non_identity_to_tensors:
            out_vals = self._run_session(non_identity_to_tensors, feed_dict)

        else:
            out_vals = []

        # For any `to_tensor`s that were also `from_tensor`s, insert the
        # corresponding concrete input value from `x` in the output's place.
        for i in sorted(identity_map):
            out_vals.insert(i, intervention[identity_map[i]])

        return out_vals
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t 
            `attribution_cut`.
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut.
            Cut from which to begin propagation. The shape of `intervention`
            must match the output shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, will be
            converted to a tensor on the same device as the model.
            intervention can also be a feed_dict

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_cut = doi_cut if doi_cut else InputCut()

        attribution_tensors = self._get_layers(attribution_cut)
        to_tensors = self._get_layers(to_cut)
        doi_tensors = self._get_layers(doi_cut)

        feed_dict, _ = self._prepare_feed_dict_with_intervention(
            model_args, model_kwargs, intervention, doi_tensors)
        z_grads = []
        with self._graph.as_default():
            for z in attribution_tensors:
                gradient_tensor_key = (z, frozenset(to_tensors))
                if gradient_tensor_key in self._cached_gradient_tensors:
                    grads = self._cached_gradient_tensors[gradient_tensor_key]
                else:
                    Q = qoi(to_tensors[0]) if len(to_tensors) == 1 else qoi(
                        to_tensors)

                    grads = [B.gradient(q, z)[0] for q in Q] if isinstance(
                        Q, DATA_CONTAINER_TYPE) else B.gradient(Q, z)[0]
                    grads = grads[0] if isinstance(
                        grads,
                        DATA_CONTAINER_TYPE) and len(grads) == 1 else grads
                    grads = [attribution_cut.access_layer(g)
                             for g in grads] if isinstance(
                                 grads, DATA_CONTAINER_TYPE
                             ) else attribution_cut.access_layer(grads)
                    self._cached_gradient_tensors[gradient_tensor_key] = grads
                z_grads.append(grads)

        grad_flat = ModelWrapper._flatten(z_grads)

        gradients = [self._run_session(g, feed_dict) for g in grad_flat]

        gradients = ModelWrapper._unflatten(gradients, z_grads)
        return gradients[0] if len(gradients) == 1 else gradients
Exemple #20
0
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None,
              return_tensor=False,
              input_timestep=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI)
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, 
            will be converted to a tensor on the same device as the model.
        input_timestep: int, optional
            Specifies a specific timestep to apply the DoI if using an RNN

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, keeping the same type as
            the input. If `attribution_cut` is supplied, also return the cut 
            activations.
        """

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        model_args = self._to_tensor(model_args)

        if intervention is None:
            intervention = model_args

        intervention = intervention if isinstance(
            intervention, DATA_CONTAINER_TYPE) else [intervention]
        intervention = self._to_tensor(intervention)

        if (isinstance(doi_cut, InputCut)):
            model_args = intervention

        else:
            doi_repeated_batch_size = intervention[0].shape[0]
            batched_model_args = []

            for val in model_args:
                doi_resolution = int(doi_repeated_batch_size / val.shape[0])
                tile_shape = [1 for _ in range(len(val.shape))]
                tile_shape[0] = doi_resolution
                repeat_shape = tuple(tile_shape)

                if isinstance(val, np.ndarray):
                    val = np.tile(val, repeat_shape)

                elif torch.is_tensor(val):
                    val = val.repeat(repeat_shape)

                batched_model_args.append(val)

            model_args = batched_model_args

        if (attribution_cut is not None):
            # Specify that we want to preserve gradient information.
            intervention = ModelWrapper._nested_apply(
                intervention,
                lambda intervention: intervention.requires_grad_(True))
            model_args = ModelWrapper._nested_apply(
                model_args, lambda model_args: model_args.requires_grad_(True))

        # Set up the intervention hookfn if we are starting from an intermediate
        # layer.
        if not isinstance(doi_cut, InputCut):
            # Define the hookfn.
            counter = 0

            def intervene_hookfn(self, inpt, outpt):
                nonlocal counter, input_timestep, doi_cut, intervention

                if input_timestep is None or input_timestep == counter:
                    # FIXME: generalize to multi-input layers. Currently can
                    #   only intervene on one layer.
                    inpt = inpt[0] if len(inpt) == 1 else inpt
                    if doi_cut.anchor == 'in':
                        ModelWrapper._nested_assign(inpt, intervention[0])
                    else:
                        ModelWrapper._nested_assign(outpt, intervention[0])

                counter += 1

            # Register according to the anchor.
            if doi_cut.anchor == 'in':
                in_handle = (self._get_layer(
                    doi_cut.name).register_forward_pre_hook(
                        partial(intervene_hookfn, outpt=None)))
            else:
                in_handle = (self._get_layer(
                    doi_cut.name).register_forward_hook(intervene_hookfn))

        # Collect the names and anchors of the layers we want to return.
        names_and_anchors = []

        self._add_cut_name_and_anchor(to_cut, names_and_anchors)

        if attribution_cut:
            self._add_cut_name_and_anchor(attribution_cut, names_and_anchors)

        # Create hookfns to extract the results from the specified layers.
        hooks = {}

        def get_hookfn(layer_name, anchor):
            def hookfn(self, inpt, outpt):
                nonlocal hooks, layer_name, anchor
                # FIXME: generalize to multi-input layers
                inpt = inpt[0] if len(inpt) == 1 else inpt

                if return_tensor:
                    if anchor == 'in':
                        hooks[layer_name] = inpt
                    else:
                        # FIXME : will not work for multibranch outputs
                        # needed to ignore hidden states of RNNs
                        outpt = outpt[0] if isinstance(outpt, tuple) else outpt
                        hooks[layer_name] = outpt

                else:
                    if anchor == 'in':
                        hooks[layer_name] = ModelWrapper._nested_apply(
                            inpt, B.as_array)
                    else:
                        outpt = outpt[0] if isinstance(outpt, tuple) else outpt
                        hooks[layer_name] = ModelWrapper._nested_apply(
                            outpt, B.as_array)

            return hookfn

        handles = [
            self._get_layer(name).register_forward_hook(
                get_hookfn(name, anchor)) for name, anchor in names_and_anchors
            if name is not None
        ]
        # Run the network.
        output = self._model(*model_args, *model_kwargs)
        if isinstance(output, tuple):
            output = output[0]

        if not isinstance(doi_cut, InputCut):
            # Clean up in handle.
            in_handle.remove()

        # Clean up out handles.
        for handle in handles:
            handle.remove()

        if attribution_cut:
            return [
                self._extract_outputs_from_hooks(to_cut, hooks, output,
                                                 model_args, return_tensor),
                self._extract_outputs_from_hooks(attribution_cut, hooks,
                                                 output, model_args,
                                                 return_tensor)
            ]
        else:
            return self._extract_outputs_from_hooks(to_cut, hooks, output,
                                                    model_args, return_tensor)
Exemple #21
0
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g., 
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI)
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, 
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, preferring to stay in the
            same format as the input. If `attribution_cut` is supplied, also 
            return the cut activations.
        """
        if not self._eager:
            return super().fprop(model_args, model_kwargs, doi_cut, to_cut,
                                 attribution_cut, intervention)

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        return_numpy = True

        if intervention is not None:
            if not isinstance(intervention, DATA_CONTAINER_TYPE):
                intervention = [intervention]

            # We return a numpy array if we were given a numpy array; otherwise
            # we will let the returned values remain data tensors.
            return_numpy = isinstance(intervention[0], np.ndarray)

            # Convert `x` to a data tensor if it isn't already.
            if return_numpy:
                intervention = ModelWrapper._nested_apply(
                    intervention, tf.constant)

        try:
            if (intervention):
                # Get Inputs and batch then the same as DoI resolution
                doi_repeated_batch_size = intervention[0].shape[0]
                batched_model_args = []
                for val in model_args:
                    if isinstance(val, np.ndarray):
                        doi_resolution = int(doi_repeated_batch_size /
                                             val.shape[0])
                        tile_shape = [1] * len(val.shape)
                        tile_shape[0] = doi_resolution
                        val = np.tile(val, tuple(tile_shape))
                    elif tf.is_tensor(val):
                        doi_resolution = int(doi_repeated_batch_size /
                                             val.shape[0])
                        val = tf.repeat(val, doi_resolution, axis=0)
                    batched_model_args.append(val)
                model_args = batched_model_args

                if not isinstance(doi_cut, InputCut):
                    from_layers = (self._get_logit_layer() if isinstance(
                        doi_cut, LogitCut) else self._get_output_layer()
                                   if isinstance(doi_cut, OutputCut) else
                                   self._get_layers_by_name(doi_cut.name))

                    for layer, x_i in zip(from_layers, intervention):
                        if doi_cut.anchor == 'in':
                            layer.input_intervention = lambda _: x_i
                        else:
                            layer.output_intervention = lambda _: x_i
                else:
                    arg_wrapped_list = False
                    # Take care of the Keras Module case where args is a tuple
                    # of list of inputs corresponding to `model._inputs`. This
                    # would have gotten unwrapped as the logic operates on the
                    # list of inputs. so needs to be re-wrapped in tuple for the
                    # model arg execution.
                    if (isinstance(model_args, DATA_CONTAINER_TYPE) and
                            isinstance(model_args[0], DATA_CONTAINER_TYPE)):

                        arg_wrapped_list = True

                    model_args = intervention

                    if arg_wrapped_list:
                        model_args = (model_args, )

            # Get the output from the "to layers," and possibly the latent
            # layers.
            def retrieve_index(i, results, anchor):
                def retrieve(inputs, output):
                    if anchor == 'in':
                        results[i] = (inputs[0] if
                                      (isinstance(inputs, DATA_CONTAINER_TYPE)
                                       and len(inputs) == 1) else inputs)
                    else:
                        results[i] = (output[0] if
                                      (isinstance(output, DATA_CONTAINER_TYPE)
                                       and len(output) == 1) else output)

                return retrieve

            if isinstance(to_cut, InputCut):
                results = model_args

            else:
                to_layers = (self._get_logit_layer() if (isinstance(
                    to_cut, LogitCut)) else self._get_output_layer() if
                             (isinstance(to_cut, OutputCut)) else
                             self._get_layers_by_name(to_cut.name))

                results = [None for _ in to_layers]

                for i, layer in enumerate(to_layers):
                    layer.retrieve_functions.append(
                        retrieve_index(i, results, to_cut.anchor))

            if attribution_cut:
                if isinstance(attribution_cut, InputCut):
                    # The attribution must be the watched tensor given from
                    # `qoi_bprop`.
                    attribution_results = intervention

                else:
                    attribution_layers = (
                        self._get_logit_layer() if
                        (isinstance(attribution_cut,
                                    LogitCut)) else self._get_output_layer() if
                        (isinstance(attribution_cut, OutputCut)) else
                        self._get_layers_by_name(attribution_cut.name))

                    attribution_results = [None for _ in attribution_layers]

                    for i, layer in enumerate(attribution_layers):
                        if self._is_input_layer(layer):
                            # Input layers don't end up calling the hook, so we
                            # have to get their output manually.
                            attribution_results[i] = intervention[
                                self._input_layer_index(layer)]

                        else:
                            layer.retrieve_functions.append(
                                retrieve_index(i, attribution_results,
                                               attribution_cut.anchor))

            # Run a point.
            self._model(*model_args, **model_kwargs)

        finally:
            # Clear the hooks after running the model so that `fprop` doesn't
            # leave the model in an altered state.
            self._clear_hooks()

        if return_numpy:
            results = ModelWrapper._nested_apply(
                results, lambda t: t.numpy()
                if not isinstance(t, np.ndarray) else t)

        return (results, attribution_results) if attribution_cut else results