Ejemplo n.º 1
0
    def _get_single_output_relevance(self, layer, output):
        if self.attribute_to_layer_input:
            normalized_relevances = layer.rule.relevance_input
        else:
            normalized_relevances = layer.rule.relevance_output
        key_list = _sort_key_list(list(normalized_relevances.keys()),
                                  self.device_ids)
        normalized_relevances = _reduce_list(
            [normalized_relevances[device_id] for device_id in key_list])

        if isinstance(normalized_relevances, tuple):
            return tuple(normalized_relevance *
                         output.reshape((-1, ) + (1, ) *
                                        (normalized_relevance.dim() - 1))
                         for normalized_relevance in normalized_relevances)
        else:
            return normalized_relevances * output.reshape(
                (-1, ) + (1, ) * (normalized_relevances.dim() - 1))
Ejemplo n.º 2
0
def _gather_distributed_tensors(
    saved_layer: Dict[device, Tuple[Tensor, ...]],
    device_ids: Union[None, List[int]] = None,
    key_list: Union[None, List[device]] = None,
) -> Tuple[Tensor, ...]:
    r"""
    A helper function to concatenate intermediate layer results stored on
    different devices in `saved_layer`. `saved_layer` is a dictionary that
    contains `device_id` as a key and intermediate layer results (either
    the input or the output of the layer) stored on the device corresponding to
    the key.
    `key_list` is a list of devices in appropriate ordering for concatenation
    and if not provided, keys are sorted based on device ids.

    If only one key exists (standard model), key list simply has one element.
    """
    if key_list is None:
        key_list = _sort_key_list(list(saved_layer.keys()), device_ids)
    return _reduce_list([saved_layer[device_id] for device_id in key_list])
Ejemplo n.º 3
0
def _neuron_gradients(
    inputs: Union[Tensor, Tuple[Tensor, ...]],
    saved_layer: Dict[device, Tuple[Tensor, ...]],
    key_list: List[device],
    gradient_neuron_selector: Union[int, Tuple[Union[int, slice], ...],
                                    Callable],
) -> Tuple[Tensor, ...]:
    with torch.autograd.set_grad_enabled(True):
        gradient_tensors = []
        for key in key_list:
            current_out_tensor = _verify_select_neuron(
                saved_layer[key], gradient_neuron_selector)
            gradient_tensors.append(
                torch.autograd.grad(
                    torch.unbind(current_out_tensor)
                    if current_out_tensor.numel() > 1 else current_out_tensor,
                    inputs,
                ))
        _total_gradients = _reduce_list(gradient_tensors, sum)
    return _total_gradients
Ejemplo n.º 4
0
def _batched_operator(operator: Callable[..., TupleOrTensorOrBoolGeneric],
                      inputs: TensorOrTupleOfTensorsGeneric,
                      additional_forward_args: Any = None,
                      target_ind: TargetType = None,
                      internal_batch_size: Union[None, int] = None,
                      **kwargs: Any) -> TupleOrTensorOrBoolGeneric:
    """
    Batches the operation of the given operator, applying the given batch size
    to inputs and additional forward arguments, and returning the concatenation
    of the results of each batch.
    """
    all_outputs = [
        operator(inputs=input,
                 additional_forward_args=additional,
                 target_ind=target,
                 **kwargs)
        for input, additional, target in _batched_generator(
            inputs, additional_forward_args, target_ind, internal_batch_size)
    ]
    return _reduce_list(all_outputs)
Ejemplo n.º 5
0
    def _evaluate_batch(
        self,
        input_list: List,
        additional_forward_args: Any,
        correct_fn_kwargs: Optional[Dict[str, Any]],
        target: TargetType,
    ) -> Optional[int]:
        if additional_forward_args is None:
            additional_forward_args = ()

        all_kwargs = {}
        if target is not None:
            all_kwargs["target"] = target
        if correct_fn_kwargs is not None:
            all_kwargs.update(correct_fn_kwargs)

        if len(input_list) == 1:
            model_out = self.forward_func(input_list[0],
                                          *additional_forward_args)
            out_metric = self.correct_fn(model_out, **all_kwargs)
            return 0 if not out_metric else None
        else:
            batched_inps = _reduce_list(input_list)
            model_out = self.forward_func(batched_inps,
                                          *additional_forward_args)
            current_count = 0
            for i in range(len(input_list)):
                batch_size = (input_list[i].shape[0] if isinstance(
                    input_list[i], Tensor) else input_list[i][0].shape[0])
                out_metric = self.correct_fn(
                    model_out[current_count:current_count + batch_size],
                    **all_kwargs)
                if not out_metric:
                    return i
                current_count += batch_size
            return None
Ejemplo n.º 6
0
def compute_layer_gradients_and_eval(
    forward_fn: Callable,
    layer: ModuleOrModuleList,
    inputs: Union[Tensor, Tuple[Tensor, ...]],
    target_ind: TargetType = None,
    additional_forward_args: Any = None,
    gradient_neuron_selector: Union[
        None, int, Tuple[Union[int, slice], ...], Callable
    ] = None,
    device_ids: Union[None, List[int]] = None,
    attribute_to_layer_input: bool = False,
    output_fn: Union[None, Callable] = None,
) -> Union[
    Tuple[Tuple[Tensor, ...], Tuple[Tensor, ...]],
    Tuple[Tuple[Tensor, ...], Tuple[Tensor, ...], Tuple[Tensor, ...]],
    Tuple[List[Tuple[Tensor, ...]], List[Tuple[Tensor, ...]]],
]:
    r"""
    Computes gradients of the output with respect to a given layer as well
    as the output evaluation of the layer for an arbitrary forward function
    and given input.

    For data parallel models, hooks are executed once per device ,so we
    need to internally combine the separated tensors from devices by
    concatenating based on device_ids. Any necessary gradients must be taken
    with respect to each independent batched tensor, so the gradients are
    computed and combined appropriately.

    More information regarding the behavior of forward hooks with DataParallel
    models can be found in the PyTorch data parallel documentation. We maintain
    the separate inputs in a dictionary protected by a lock, analogous to the
    gather implementation for the core PyTorch DataParallel implementation.

    NOTE: To properly handle inplace operations, a clone of the layer output
    is stored. This structure inhibits execution of a backward hook on the last
    module for the layer output when computing the gradient with respect to
    the input, since we store an intermediate clone, as
    opposed to the true module output. If backward module hooks are necessary
    for the final module when computing input gradients, utilize
    _forward_layer_eval_with_neuron_grads instead.

    Args:

        forward_fn: forward function. This can be for example model's
                    forward function.
        layer:      Layer for which gradients / output will be evaluated.
        inputs:     Input at which gradients are evaluated,
                    will be passed to forward_fn.
        target_ind: Index of the target class for which gradients
                    must be computed (classification only).
        output_fn:  An optional function that is applied to the layer inputs or
                    outputs depending whether the `attribute_to_layer_input` is
                    set to `True` or `False`
        args:       Additional input arguments that forward function requires.
                    It takes an empty tuple (no additional arguments) if no
                    additional arguments are required


    Returns:
        2-element tuple of **gradients**, **evals**:
        - **gradients**:
            Gradients of output with respect to target layer output.
        - **evals**:
            Target layer output for given input.
    """
    with torch.autograd.set_grad_enabled(True):
        # saved_layer is a dictionary mapping device to a tuple of
        # layer evaluations on that device.
        saved_layer, output = _forward_layer_distributed_eval(
            forward_fn,
            inputs,
            layer,
            target_ind=target_ind,
            additional_forward_args=additional_forward_args,
            attribute_to_layer_input=attribute_to_layer_input,
            forward_hook_with_return=True,
            require_layer_grads=True,
        )
        assert output[0].numel() == 1, (
            "Target not provided when necessary, cannot"
            " take gradient with respect to multiple outputs."
        )

        device_ids = _extract_device_ids(forward_fn, saved_layer, device_ids)

        # Identifies correct device ordering based on device ids.
        # key_list is a list of devices in appropriate ordering for concatenation.
        # If only one key exists (standard model), key list simply has one element.
        key_list = _sort_key_list(
            list(next(iter(saved_layer.values())).keys()), device_ids
        )
        all_outputs: Union[Tuple[Tensor, ...], List[Tuple[Tensor, ...]]]
        if isinstance(layer, Module):
            all_outputs = _reduce_list(
                [
                    saved_layer[layer][device_id]
                    if output_fn is None
                    else output_fn(saved_layer[layer][device_id])
                    for device_id in key_list
                ]
            )
        else:
            all_outputs = [
                _reduce_list(
                    [
                        saved_layer[single_layer][device_id]
                        if output_fn is None
                        else output_fn(saved_layer[single_layer][device_id])
                        for device_id in key_list
                    ]
                )
                for single_layer in layer
            ]
        all_layers: List[Module] = [layer] if isinstance(layer, Module) else layer
        grad_inputs = tuple(
            layer_tensor
            for single_layer in all_layers
            for device_id in key_list
            for layer_tensor in saved_layer[single_layer][device_id]
        )
        saved_grads = torch.autograd.grad(torch.unbind(output), grad_inputs)

        offset = 0
        all_grads: List[Tuple[Tensor, ...]] = []
        for single_layer in all_layers:
            num_tensors = len(next(iter(saved_layer[single_layer].values())))
            curr_saved_grads = [
                saved_grads[i : i + num_tensors]
                for i in range(
                    offset, offset + len(key_list) * num_tensors, num_tensors
                )
            ]
            offset += len(key_list) * num_tensors
            if output_fn is not None:
                curr_saved_grads = [
                    output_fn(curr_saved_grad) for curr_saved_grad in curr_saved_grads
                ]

            all_grads.append(_reduce_list(curr_saved_grads))

        layer_grads: Union[Tuple[Tensor, ...], List[Tuple[Tensor, ...]]]
        layer_grads = all_grads
        if isinstance(layer, Module):
            layer_grads = all_grads[0]

        if gradient_neuron_selector is not None:
            assert isinstance(
                layer, Module
            ), "Cannot compute neuron gradients for multiple layers simultaneously!"
            inp_grads = _neuron_gradients(
                inputs, saved_layer[layer], key_list, gradient_neuron_selector
            )
            return (
                cast(Tuple[Tensor, ...], layer_grads),
                cast(Tuple[Tensor, ...], all_outputs),
                inp_grads,
            )
    return layer_grads, all_outputs  # type: ignore
Ejemplo n.º 7
0
 def test_reduce_list_tensors(self):
     tensors = [torch.tensor([[3, 4, 5]]), torch.tensor([[0, 1, 2]])]
     reduced = _reduce_list(tensors)
     assertTensorAlmostEqual(self, reduced, [[3, 4, 5], [0, 1, 2]])