Beispiel #1
0
    def __get_slice(slice_arg):
        """
        Helper function to get a `Slice` object from more user-friendly
        primitive arguments.
        """
        if isinstance(slice_arg, Slice):
            # We are already given a Slice, so return it.
            return slice_arg

        elif (isinstance(slice_arg, Cut) or isinstance(slice_arg, int) or
              isinstance(slice_arg, str) or slice_arg is None or
              slice_arg == 0):

            # If we receive a Cut, we take it to be the Cut of the start layer.
            return Slice(InternalInfluence.__get_cut(slice_arg), OutputCut())

        elif isinstance(slice_arg, DATA_CONTAINER_TYPE):
            # If we receive a DATA_CONTAINER_TYPE, we take it to be the start
            # and end layer of the slice.
            if len(slice_arg) is 2:
                return Slice(
                    InternalInfluence.__get_cut(slice_arg[0]),
                    InternalInfluence.__get_cut(slice_arg[1]))

            else:
                raise ValueError(
                    'Tuple or list argument for `cuts` must have length 2')

        else:
            raise ValueError('Unrecognized argument type for `cuts`')
Beispiel #2
0
    def __init__(
            self, model: ModelWrapper, baseline=None, resolution: int = 50):
        """
        Parameters:
            model:
                Model for which attributions are calculated.

            baseline:
                The baseline to interpolate from. Must be same shape as the 
                input. If `None` is given, the zero vector in the appropriate 
                shape will be used.

            resolution:
                Number of points to use in the approximation. A higher 
                resolution is more computationally expensive, but gives a better
                approximation of the mathematical formula this attribution 
                method represents.
        """
        super().__init__(
            model,
            OutputCut(),
            'max',
            LinearDoi(baseline, resolution),
            multiply_activation=True)
Beispiel #3
0
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None,
              return_tensor=False,
              input_timestep=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI)
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, 
            will be converted to a tensor on the same device as the model.
        input_timestep: int, optional
            Specifies a specific timestep to apply the DoI if using an RNN

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, keeping the same type as
            the input. If `attribution_cut` is supplied, also return the cut 
            activations.
        """

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        model_args = self._to_tensor(model_args)

        if intervention is None:
            intervention = model_args

        intervention = intervention if isinstance(
            intervention, DATA_CONTAINER_TYPE) else [intervention]
        intervention = self._to_tensor(intervention)

        if (isinstance(doi_cut, InputCut)):
            model_args = intervention

        else:
            doi_repeated_batch_size = intervention[0].shape[0]
            batched_model_args = []

            for val in model_args:
                doi_resolution = int(doi_repeated_batch_size / val.shape[0])
                tile_shape = [1 for _ in range(len(val.shape))]
                tile_shape[0] = doi_resolution
                repeat_shape = tuple(tile_shape)

                if isinstance(val, np.ndarray):
                    val = np.tile(val, repeat_shape)

                elif torch.is_tensor(val):
                    val = val.repeat(repeat_shape)

                batched_model_args.append(val)

            model_args = batched_model_args

        if (attribution_cut is not None):
            # Specify that we want to preserve gradient information.
            intervention = ModelWrapper._nested_apply(
                intervention,
                lambda intervention: intervention.requires_grad_(True))
            model_args = ModelWrapper._nested_apply(
                model_args, lambda model_args: model_args.requires_grad_(True))

        # Set up the intervention hookfn if we are starting from an intermediate
        # layer.
        if not isinstance(doi_cut, InputCut):
            # Define the hookfn.
            counter = 0

            def intervene_hookfn(self, inpt, outpt):
                nonlocal counter, input_timestep, doi_cut, intervention

                if input_timestep is None or input_timestep == counter:
                    # FIXME: generalize to multi-input layers. Currently can
                    #   only intervene on one layer.
                    inpt = inpt[0] if len(inpt) == 1 else inpt
                    if doi_cut.anchor == 'in':
                        ModelWrapper._nested_assign(inpt, intervention[0])
                    else:
                        ModelWrapper._nested_assign(outpt, intervention[0])

                counter += 1

            # Register according to the anchor.
            if doi_cut.anchor == 'in':
                in_handle = (self._get_layer(
                    doi_cut.name).register_forward_pre_hook(
                        partial(intervene_hookfn, outpt=None)))
            else:
                in_handle = (self._get_layer(
                    doi_cut.name).register_forward_hook(intervene_hookfn))

        # Collect the names and anchors of the layers we want to return.
        names_and_anchors = []

        self._add_cut_name_and_anchor(to_cut, names_and_anchors)

        if attribution_cut:
            self._add_cut_name_and_anchor(attribution_cut, names_and_anchors)

        # Create hookfns to extract the results from the specified layers.
        hooks = {}

        def get_hookfn(layer_name, anchor):
            def hookfn(self, inpt, outpt):
                nonlocal hooks, layer_name, anchor
                # FIXME: generalize to multi-input layers
                inpt = inpt[0] if len(inpt) == 1 else inpt

                if return_tensor:
                    if anchor == 'in':
                        hooks[layer_name] = inpt
                    else:
                        # FIXME : will not work for multibranch outputs
                        # needed to ignore hidden states of RNNs
                        outpt = outpt[0] if isinstance(outpt, tuple) else outpt
                        hooks[layer_name] = outpt

                else:
                    if anchor == 'in':
                        hooks[layer_name] = ModelWrapper._nested_apply(
                            inpt, B.as_array)
                    else:
                        outpt = outpt[0] if isinstance(outpt, tuple) else outpt
                        hooks[layer_name] = ModelWrapper._nested_apply(
                            outpt, B.as_array)

            return hookfn

        handles = [
            self._get_layer(name).register_forward_hook(
                get_hookfn(name, anchor)) for name, anchor in names_and_anchors
            if name is not None
        ]
        # Run the network.
        output = self._model(*model_args, *model_kwargs)
        if isinstance(output, tuple):
            output = output[0]

        if not isinstance(doi_cut, InputCut):
            # Clean up in handle.
            in_handle.remove()

        # Clean up out handles.
        for handle in handles:
            handle.remove()

        if attribution_cut:
            return [
                self._extract_outputs_from_hooks(to_cut, hooks, output,
                                                 model_args, return_tensor),
                self._extract_outputs_from_hooks(attribution_cut, hooks,
                                                 output, model_args,
                                                 return_tensor)
            ]
        else:
            return self._extract_outputs_from_hooks(to_cut, hooks, output,
                                                    model_args, return_tensor)
Beispiel #4
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t
            `attribution_cut`.
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut.
            Cut from which to begin propagation. The shape of `intervention`
            must match the output shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        y, zs = self.fprop(model_args,
                           model_kwargs,
                           doi_cut=doi_cut if doi_cut else InputCut(),
                           to_cut=to_cut,
                           attribution_cut=attribution_cut,
                           intervention=intervention,
                           return_tensor=True)

        y = to_cut.access_layer(y)
        grads_list = []
        for z in zs:
            z_flat = ModelWrapper._flatten(z)
            qoi_out = qoi(y)

            grads_flat = [B.gradient(B.sum(q), z_flat)
                          for q in qoi_out] if isinstance(
                              qoi_out, DATA_CONTAINER_TYPE) else B.gradient(
                                  B.sum(qoi_out), z_flat)

            grads = [
                ModelWrapper._unflatten(g, z, count=[0]) for g in grads_flat
            ] if isinstance(qoi_out,
                            DATA_CONTAINER_TYPE) else ModelWrapper._unflatten(
                                grads_flat, z, count=[0])

            grads = [
                attribution_cut.access_layer(g) for g in grads
            ] if isinstance(
                qoi_out,
                DATA_CONTAINER_TYPE) else attribution_cut.access_layer(grads)

            grads = [B.as_array(g) for g in grads] if isinstance(
                qoi_out, DATA_CONTAINER_TYPE) else B.as_array(grads)

            grads_list.append(grads)

        del y  # TODO: garbage collection

        return grads_list[0] if len(grads_list) == 1 else grads_list
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t 
            `attribution_cut`.
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut.
            Cut from which to begin propagation. The shape of `intervention`
            must match the output shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, will be
            converted to a tensor on the same device as the model.
            intervention can also be a feed_dict

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_cut = doi_cut if doi_cut else InputCut()

        attribution_tensors = self._get_layers(attribution_cut)
        to_tensors = self._get_layers(to_cut)
        doi_tensors = self._get_layers(doi_cut)

        feed_dict, _ = self._prepare_feed_dict_with_intervention(
            model_args, model_kwargs, intervention, doi_tensors)
        z_grads = []
        with self._graph.as_default():
            for z in attribution_tensors:
                gradient_tensor_key = (z, frozenset(to_tensors))
                if gradient_tensor_key in self._cached_gradient_tensors:
                    grads = self._cached_gradient_tensors[gradient_tensor_key]
                else:
                    Q = qoi(to_tensors[0]) if len(to_tensors) == 1 else qoi(
                        to_tensors)

                    grads = [B.gradient(q, z)[0] for q in Q] if isinstance(
                        Q, DATA_CONTAINER_TYPE) else B.gradient(Q, z)[0]
                    grads = grads[0] if isinstance(
                        grads,
                        DATA_CONTAINER_TYPE) and len(grads) == 1 else grads
                    grads = [attribution_cut.access_layer(g)
                             for g in grads] if isinstance(
                                 grads, DATA_CONTAINER_TYPE
                             ) else attribution_cut.access_layer(grads)
                    self._cached_gradient_tensors[gradient_tensor_key] = grads
                z_grads.append(grads)

        grad_flat = ModelWrapper._flatten(z_grads)

        gradients = [self._run_session(g, feed_dict) for g in grad_flat]

        gradients = ModelWrapper._unflatten(gradients, z_grads)
        return gradients[0] if len(gradients) == 1 else gradients
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI).
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, will be
            converted to a tensor on the same device as the model. Intervention
            can also be a `feed_dict`.

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, keeping same type as the
            input. If `attribution_cut` is supplied, also return the cut 
            activations.
        """

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_tensors = self._get_layers(doi_cut)
        to_tensors = self._get_layers(to_cut)

        feed_dict, intervention = self._prepare_feed_dict_with_intervention(
            model_args, model_kwargs, intervention, doi_tensors)

        # Tensorlow doesn't allow you to make a function that returns the same
        # tensor as it takes in. Thus, we have to have a special case for the
        # identity function. Any tensors that are both in `doi_tensors` and
        # `to_tensors` cannot be computed via a `keras.backend.function` and
        # thus need to be taken from the input, `x`.
        identity_map = {
            i: j
            for i, to_tensor in enumerate(to_tensors)
            for j, from_tensor in enumerate(doi_tensors)
            if to_tensor == from_tensor
        }

        non_identity_to_tensors = [
            to_tensor for i, to_tensor in enumerate(to_tensors)
            if i not in identity_map
        ]

        # Compute the output values of `to_tensors` unless all `to_tensor`s were
        # also `doi_tensors`.
        if non_identity_to_tensors:
            out_vals = self._run_session(non_identity_to_tensors, feed_dict)

        else:
            out_vals = []

        # For any `to_tensor`s that were also `from_tensor`s, insert the
        # corresponding concrete input value from `x` in the output's place.
        for i in sorted(identity_map):
            out_vals.insert(i, intervention[identity_map[i]])

        return out_vals
Beispiel #7
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g.,
            Pytorch models may accept Pytorch tensors in additon to
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`. 
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t
            `attribution_cut`
        doi_cut: Cut, 
            If `doi_cut` is None, this refers to the InputCut. Cut from which to
            begin propagation. The shape of `intervention` must match the output
            shape of this layer.
        attribution_cut: Cut, optional
            If `attribution_cut` is None, this refers to the InputCut. The Cut
            in which attribution will be calculated. This is generally taken
            from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            If `to_cut` is None, this refers to the OutputCut. The Cut in which
            qoi will be calculated. This is generally taken from the attribution
            slice's `to_cut`.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            The gradients of `qoi` w.r.t. `attribution_cut`, keeping same type
            as the input.
        """

        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        doi_cut = doi_cut if doi_cut else InputCut()

        attribution_tensors = self._get_layers(attribution_cut)
        to_tensors = self._get_layers(to_cut)
        doi_tensors = self._get_layers(doi_cut)
        if intervention is None:
            intervention = model_args

        intervention = intervention if isinstance(
            intervention, DATA_CONTAINER_TYPE) else [intervention]

        Q = qoi(to_tensors[0]) if len(to_tensors) == 1 else qoi(to_tensors)

        gradients = [
            keras.backend.function(
                doi_tensors, B.gradient(q, attribution_tensors))(intervention)
            for q in Q
        ] if isinstance(Q, DATA_CONTAINER_TYPE) else keras.backend.function(
            doi_tensors, B.gradient(Q, attribution_tensors))(intervention)

        return gradients[0] if len(gradients) == 1 else gradients
Beispiel #8
0
    def qoi_bprop(self,
                  qoi,
                  model_args,
                  model_kwargs={},
                  doi_cut=None,
                  to_cut=None,
                  attribution_cut=None,
                  intervention=None):
        """
        qoi_bprop Run the model from the from_layer to the qoi layer
            and give the gradients w.r.t `attribution_cut`

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g., 
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        
        qoi: a Quantity of Interest
            This method will accumulate all gradients of the qoi w.r.t 
            `attribution_cut`
        doi_cut: Cut, 
            if `doi_cut` is None, this refers to the InputCut. Cut from which to
            begin propagation. The shape of `intervention` must match the output
            shape of this layer.
        attribution_cut: Cut, optional
            if `attribution_cut` is None, this refers to the InputCut.
            The Cut in which attribution will be calculated. This is generally
            taken from the attribution slyce's attribution_cut.
        to_cut: Cut, optional
            if `to_cut` is None, this refers to the OutputCut.
            The Cut in which qoi will be calculated. This is generally
            taken from the attribution slyce's to_cut.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array,
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (backend.Tensor or np.ndarray)
            the gradients of `qoi` w.r.t. `attribution_cut`, keeping same type 
            as the input.
        """
        if intervention is None:
            intervention = model_args

        if not self._eager:
            return super().qoi_bprop(qoi, model_args, model_kwargs, doi_cut,
                                     to_cut, attribution_cut, intervention)

        if attribution_cut is None:
            attribution_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        return_numpy = True

        with tf.GradientTape(persistent=True) as tape:

            intervention = intervention if isinstance(
                intervention, DATA_CONTAINER_TYPE) else [intervention]
            # We return a numpy array if we were given a numpy array; otherwise
            # we will let the returned values remain data tensors.
            return_numpy = isinstance(intervention, np.ndarray) or isinstance(
                intervention[0], np.ndarray)

            # Convert `intervention` to a data tensor if it isn't already.

            if return_numpy:
                intervention = [
                    ModelWrapper._nested_apply(x_i, tf.constant)
                    for x_i in intervention
                ]

            for x_i in intervention:
                ModelWrapper._nested_apply(x_i, tape.watch)

            outputs, attribution_features = self.fprop(
                model_args,
                model_kwargs,
                doi_cut=doi_cut if doi_cut else InputCut(),
                to_cut=to_cut,
                attribution_cut=attribution_cut,
                intervention=intervention)
            if isinstance(outputs, DATA_CONTAINER_TYPE) and isinstance(
                    outputs[0], DATA_CONTAINER_TYPE):
                outputs = outputs[0]

            Q = qoi(outputs[0]) if len(outputs) == 1 else qoi(outputs)
            if isinstance(Q, DATA_CONTAINER_TYPE) and len(Q) == 1:
                Q = B.sum(Q)

        grads = [tape.gradient(q, attribution_features) for q in Q
                 ] if isinstance(Q, DATA_CONTAINER_TYPE) else tape.gradient(
                     Q, attribution_features)

        grads = grads[0] if isinstance(
            grads, DATA_CONTAINER_TYPE) and len(grads) == 1 else grads

        grads = [attribution_cut.access_layer(g) for g in grads] if isinstance(
            grads,
            DATA_CONTAINER_TYPE) else attribution_cut.access_layer(grads)

        del tape

        if return_numpy:
            grads = [ModelWrapper._nested_apply(g, B.as_array)
                     for g in grads] if isinstance(
                         grads,
                         DATA_CONTAINER_TYPE) else ModelWrapper._nested_apply(
                             grads, B.as_array)

        return grads[0] if isinstance(
            grads, DATA_CONTAINER_TYPE) and len(grads) == 1 else grads
Beispiel #9
0
    def fprop(self,
              model_args,
              model_kwargs={},
              doi_cut=None,
              to_cut=None,
              attribution_cut=None,
              intervention=None):
        """
        fprop Forward propagate the model

        Parameters
        ----------
        model_args, model_kwargs: 
            The args and kwargs given to the call method of a model.
            This should represent the instances to obtain attributions for, 
            assumed to be a *batched* input. if `self.model` supports evaluation 
            on *data tensors*, the  appropriate tensor type may be used (e.g., 
            Pytorch models may accept Pytorch tensors in addition to 
            `np.ndarray`s). The shape of the inputs must match the input shape
            of `self.model`.
        doi_cut: Cut, optional
            The Cut from which to begin propagation. The shape of `intervention`
            must match the input shape of this layer. This is usually used to 
            apply distributions of interest (DoI)
        to_cut : Cut, optional
            The Cut to return output activation tensors for. If `None`,
            assumed to be just the final layer. By default None
        attribution_cut : Cut, optional
            An Cut to return activation tensors for. If `None` 
            attributions layer output is not returned.
        intervention : backend.Tensor or np.array
            Input tensor to propagate through the model. If an np.array, 
            will be converted to a tensor on the same device as the model.

        Returns
        -------
        (list of backend.Tensor or np.ndarray)
            A list of output activations are returned, preferring to stay in the
            same format as the input. If `attribution_cut` is supplied, also 
            return the cut activations.
        """
        if not self._eager:
            return super().fprop(model_args, model_kwargs, doi_cut, to_cut,
                                 attribution_cut, intervention)

        if doi_cut is None:
            doi_cut = InputCut()
        if to_cut is None:
            to_cut = OutputCut()

        return_numpy = True

        if intervention is not None:
            if not isinstance(intervention, DATA_CONTAINER_TYPE):
                intervention = [intervention]

            # We return a numpy array if we were given a numpy array; otherwise
            # we will let the returned values remain data tensors.
            return_numpy = isinstance(intervention[0], np.ndarray)

            # Convert `x` to a data tensor if it isn't already.
            if return_numpy:
                intervention = ModelWrapper._nested_apply(
                    intervention, tf.constant)

        try:
            if (intervention):
                # Get Inputs and batch then the same as DoI resolution
                doi_repeated_batch_size = intervention[0].shape[0]
                batched_model_args = []
                for val in model_args:
                    if isinstance(val, np.ndarray):
                        doi_resolution = int(doi_repeated_batch_size /
                                             val.shape[0])
                        tile_shape = [1] * len(val.shape)
                        tile_shape[0] = doi_resolution
                        val = np.tile(val, tuple(tile_shape))
                    elif tf.is_tensor(val):
                        doi_resolution = int(doi_repeated_batch_size /
                                             val.shape[0])
                        val = tf.repeat(val, doi_resolution, axis=0)
                    batched_model_args.append(val)
                model_args = batched_model_args

                if not isinstance(doi_cut, InputCut):
                    from_layers = (self._get_logit_layer() if isinstance(
                        doi_cut, LogitCut) else self._get_output_layer()
                                   if isinstance(doi_cut, OutputCut) else
                                   self._get_layers_by_name(doi_cut.name))

                    for layer, x_i in zip(from_layers, intervention):
                        if doi_cut.anchor == 'in':
                            layer.input_intervention = lambda _: x_i
                        else:
                            layer.output_intervention = lambda _: x_i
                else:
                    arg_wrapped_list = False
                    # Take care of the Keras Module case where args is a tuple
                    # of list of inputs corresponding to `model._inputs`. This
                    # would have gotten unwrapped as the logic operates on the
                    # list of inputs. so needs to be re-wrapped in tuple for the
                    # model arg execution.
                    if (isinstance(model_args, DATA_CONTAINER_TYPE) and
                            isinstance(model_args[0], DATA_CONTAINER_TYPE)):

                        arg_wrapped_list = True

                    model_args = intervention

                    if arg_wrapped_list:
                        model_args = (model_args, )

            # Get the output from the "to layers," and possibly the latent
            # layers.
            def retrieve_index(i, results, anchor):
                def retrieve(inputs, output):
                    if anchor == 'in':
                        results[i] = (inputs[0] if
                                      (isinstance(inputs, DATA_CONTAINER_TYPE)
                                       and len(inputs) == 1) else inputs)
                    else:
                        results[i] = (output[0] if
                                      (isinstance(output, DATA_CONTAINER_TYPE)
                                       and len(output) == 1) else output)

                return retrieve

            if isinstance(to_cut, InputCut):
                results = model_args

            else:
                to_layers = (self._get_logit_layer() if (isinstance(
                    to_cut, LogitCut)) else self._get_output_layer() if
                             (isinstance(to_cut, OutputCut)) else
                             self._get_layers_by_name(to_cut.name))

                results = [None for _ in to_layers]

                for i, layer in enumerate(to_layers):
                    layer.retrieve_functions.append(
                        retrieve_index(i, results, to_cut.anchor))

            if attribution_cut:
                if isinstance(attribution_cut, InputCut):
                    # The attribution must be the watched tensor given from
                    # `qoi_bprop`.
                    attribution_results = intervention

                else:
                    attribution_layers = (
                        self._get_logit_layer() if
                        (isinstance(attribution_cut,
                                    LogitCut)) else self._get_output_layer() if
                        (isinstance(attribution_cut, OutputCut)) else
                        self._get_layers_by_name(attribution_cut.name))

                    attribution_results = [None for _ in attribution_layers]

                    for i, layer in enumerate(attribution_layers):
                        if self._is_input_layer(layer):
                            # Input layers don't end up calling the hook, so we
                            # have to get their output manually.
                            attribution_results[i] = intervention[
                                self._input_layer_index(layer)]

                        else:
                            layer.retrieve_functions.append(
                                retrieve_index(i, attribution_results,
                                               attribution_cut.anchor))

            # Run a point.
            self._model(*model_args, **model_kwargs)

        finally:
            # Clear the hooks after running the model so that `fprop` doesn't
            # leave the model in an altered state.
            self._clear_hooks()

        if return_numpy:
            results = ModelWrapper._nested_apply(
                results, lambda t: t.numpy()
                if not isinstance(t, np.ndarray) else t)

        return (results, attribution_results) if attribution_cut else results