def _next_infidelity(current_n_perturb_samples: int) -> Tensor: perturbations, inputs_perturbed = _generate_perturbations( current_n_perturb_samples ) perturbations = _format_tensor_into_tuples(perturbations) inputs_perturbed = _format_tensor_into_tuples(inputs_perturbed) _validate_inputs_and_perturbations( cast(Tuple[Tensor, ...], inputs), cast(Tuple[Tensor, ...], inputs_perturbed), cast(Tuple[Tensor, ...], perturbations), ) targets_expanded = _expand_target( target, current_n_perturb_samples, expansion_type=ExpansionTypes.repeat_interleave, ) additional_forward_args_expanded = _expand_additional_forward_args( additional_forward_args, current_n_perturb_samples, expansion_type=ExpansionTypes.repeat_interleave, ) inputs_perturbed_fwd = _run_forward( forward_func, inputs_perturbed, targets_expanded, additional_forward_args_expanded, ) inputs_fwd = _run_forward(forward_func, inputs, target, additional_forward_args) inputs_fwd = torch.repeat_interleave( inputs_fwd, current_n_perturb_samples, dim=0 ) inputs_minus_perturb = inputs_fwd - inputs_perturbed_fwd attributions_expanded = tuple( torch.repeat_interleave(attribution, current_n_perturb_samples, dim=0) for attribution in attributions ) attributions_times_perturb = tuple( (attribution_expanded * perturbation).view(attribution_expanded.size(0), -1) for attribution_expanded, perturbation in zip( attributions_expanded, perturbations ) ) attribution_times_perturb_sums = sum( [ torch.sum(attribution_times_perturb, dim=1) for attribution_times_perturb in attributions_times_perturb ] ) return torch.sum( torch.pow( attribution_times_perturb_sums - inputs_minus_perturb.view(-1), 2 ).view(bsz, -1), dim=1, )
def _expand_inputs_baselines_targets( self, baselines: Tuple[Tensor, ...], inputs: Tuple[Tensor, ...], target: TargetType, additional_forward_args: Any, ) -> Tuple[Tuple[Tensor, ...], Tuple[Tensor, ...], TargetType, Any]: inp_bsz = inputs[0].shape[0] base_bsz = baselines[0].shape[0] expanded_inputs = tuple([ input.repeat_interleave(base_bsz, dim=0).requires_grad_() for input in inputs ]) expanded_baselines = tuple([ baseline.repeat((inp_bsz, ) + tuple([1] * (len(baseline.shape) - 1))).requires_grad_() for baseline in baselines ]) expanded_target = _expand_target( target, base_bsz, expansion_type=ExpansionTypes.repeat_interleave) input_additional_args = (_expand_additional_forward_args( additional_forward_args, base_bsz, expansion_type=ExpansionTypes.repeat_interleave, ) if additional_forward_args is not None else None) return ( expanded_inputs, expanded_baselines, expanded_target, input_additional_args, )
def _linear_search( self, inputs: Any, preproc_input: Any, attack_kwargs: Optional[Dict[str, Any]], additional_forward_args: Any, expanded_additional_args: Any, correct_fn_kwargs: Optional[Dict[str, Any]], target: TargetType, perturbations_per_eval: int, ) -> Tuple[Any, Optional[Union[int, float]]]: input_list = [] attack_inp_list = [] param_list = [] for param in drange(self.arg_min, self.arg_max, self.arg_step): for _ in range(self.num_attempts): preproc_attacked_inp, attacked_inp = self._apply_attack( inputs, preproc_input, attack_kwargs, param) input_list.append(preproc_attacked_inp) param_list.append(param) attack_inp_list.append(attacked_inp) if len(input_list) == perturbations_per_eval: successful_ind = self._evaluate_batch( input_list, expanded_additional_args, correct_fn_kwargs, target, ) if successful_ind is not None: return ( attack_inp_list[successful_ind], param_list[successful_ind], ) input_list = [] param_list = [] attack_inp_list = [] if len(input_list) > 0: final_add_args = _expand_additional_forward_args( additional_forward_args, len(input_list)) successful_ind = self._evaluate_batch( input_list, final_add_args, correct_fn_kwargs, target, ) if successful_ind is not None: return ( attack_inp_list[successful_ind], param_list[successful_ind], ) return None, None
def pre_hook(module: Module, baseline_inputs_add_args: Tuple) -> Tuple: inputs = baseline_inputs_add_args[0] baselines = baseline_inputs_add_args[1] additional_args = None if len(baseline_inputs_add_args) > 2: additional_args = baseline_inputs_add_args[2:] baseline_input_tsr = tuple( torch.cat([input, baseline]) for input, baseline in zip(inputs, baselines)) if additional_args is not None: expanded_additional_args = cast( Tuple, _expand_additional_forward_args(additional_args, 2, ExpansionTypes.repeat), ) return (*baseline_input_tsr, *expanded_additional_args) return baseline_input_tsr
def _next_infidelity_tensors( current_n_perturb_samples: int, ) -> Union[Tuple[Tensor], Tuple[Tensor, Tensor, Tensor]]: perturbations, inputs_perturbed = _generate_perturbations( current_n_perturb_samples ) perturbations = _format_tensor_into_tuples(perturbations) inputs_perturbed = _format_tensor_into_tuples(inputs_perturbed) _validate_inputs_and_perturbations( cast(Tuple[Tensor, ...], inputs), cast(Tuple[Tensor, ...], inputs_perturbed), cast(Tuple[Tensor, ...], perturbations), ) targets_expanded = _expand_target( target, current_n_perturb_samples, expansion_type=ExpansionTypes.repeat_interleave, ) additional_forward_args_expanded = _expand_additional_forward_args( additional_forward_args, current_n_perturb_samples, expansion_type=ExpansionTypes.repeat_interleave, ) inputs_perturbed_fwd = _run_forward( forward_func, inputs_perturbed, targets_expanded, additional_forward_args_expanded, ) inputs_fwd = _run_forward(forward_func, inputs, target, additional_forward_args) inputs_fwd = torch.repeat_interleave( inputs_fwd, current_n_perturb_samples, dim=0 ) perturbed_fwd_diffs = inputs_fwd - inputs_perturbed_fwd attributions_expanded = tuple( torch.repeat_interleave(attribution, current_n_perturb_samples, dim=0) for attribution in attributions ) attributions_times_perturb = tuple( (attribution_expanded * perturbation).view(attribution_expanded.size(0), -1) for attribution_expanded, perturbation in zip( attributions_expanded, perturbations ) ) attr_times_perturb_sums = sum( torch.sum(attribution_times_perturb, dim=1) for attribution_times_perturb in attributions_times_perturb ) attr_times_perturb_sums = cast(Tensor, attr_times_perturb_sums) # reshape as Tensor(bsz, current_n_perturb_samples) attr_times_perturb_sums = attr_times_perturb_sums.view(bsz, -1) perturbed_fwd_diffs = perturbed_fwd_diffs.view(bsz, -1) if normalize: # in order to normalize, we have to aggregate the following tensors # to calculate MSE in its polynomial expansion: # (a-b)^2 = a^2 - 2ab + b^2 return ( attr_times_perturb_sums.pow(2).sum(-1), (attr_times_perturb_sums * perturbed_fwd_diffs).sum(-1), perturbed_fwd_diffs.pow(2).sum(-1), ) else: # returns (a-b)^2 if no need to normalize return ((attr_times_perturb_sums - perturbed_fwd_diffs).pow(2).sum(-1),)
def _attribute( self, inputs: Tuple[Tensor, ...], neuron_selector: Union[int, Tuple[int, ...], Callable], baselines: Tuple[Union[Tensor, int, float], ...], target: TargetType = None, additional_forward_args: Any = None, n_steps: int = 50, method: str = "riemann_trapezoid", attribute_to_neuron_input: bool = False, step_sizes_and_alphas: Union[None, Tuple[List[float], List[float]]] = None, ) -> Tuple[Tensor, ...]: num_examples = inputs[0].shape[0] total_batch = num_examples * n_steps if step_sizes_and_alphas is None: # retrieve step size and scaling factor for specified approximation method step_sizes_func, alphas_func = approximation_parameters(method) step_sizes, alphas = step_sizes_func(n_steps), alphas_func(n_steps) else: step_sizes, alphas = step_sizes_and_alphas # Compute scaled inputs from baseline to final input. scaled_features_tpl = tuple( torch.cat( [baseline + alpha * (input - baseline) for alpha in alphas], dim=0).requires_grad_() for input, baseline in zip(inputs, baselines)) additional_forward_args = _format_additional_forward_args( additional_forward_args) # apply number of steps to additional forward args # currently, number of steps is applied only to additional forward arguments # that are nd-tensors. It is assumed that the first dimension is # the number of batches. # dim -> (#examples * #steps x additional_forward_args[0].shape[1:], ...) input_additional_args = (_expand_additional_forward_args( additional_forward_args, n_steps) if additional_forward_args is not None else None) expanded_target = _expand_target(target, n_steps) # Conductance Gradients - Returns gradient of output with respect to # hidden layer and hidden layer evaluated at each input. layer_gradients, layer_eval, input_grads = compute_layer_gradients_and_eval( forward_fn=self.forward_func, layer=self.layer, inputs=scaled_features_tpl, target_ind=expanded_target, additional_forward_args=input_additional_args, gradient_neuron_selector=neuron_selector, device_ids=self.device_ids, attribute_to_layer_input=attribute_to_neuron_input, ) mid_grads = _verify_select_neuron(layer_gradients, neuron_selector) scaled_input_gradients = tuple( input_grad * mid_grads.reshape((total_batch, ) + (1, ) * (len(input_grad.shape) - 1)) for input_grad in input_grads) # Mutliplies by appropriate step size. scaled_grads = tuple( scaled_input_gradient.contiguous().view(n_steps, -1) * torch.tensor(step_sizes).view(n_steps, 1).to( scaled_input_gradient.device) for scaled_input_gradient in scaled_input_gradients) # Aggregates across all steps for each tensor in the input tuple total_grads = tuple( _reshape_and_sum(scaled_grad, n_steps, num_examples, input_grad.shape[1:]) for (scaled_grad, input_grad) in zip(scaled_grads, input_grads)) if self.multiplies_by_inputs: # computes attribution for each tensor in input tuple # attributions has the same dimensionality as inputs attributions = tuple(total_grad * (input - baseline) for total_grad, input, baseline in zip( total_grads, inputs, baselines)) else: attributions = total_grads return attributions
def _attribute( self, inputs: Tuple[Tensor, ...], baselines: Tuple[Union[Tensor, int, float], ...], target: TargetType = None, additional_forward_args: Any = None, n_steps: int = 50, method: str = "gausslegendre", attribute_to_layer_input: bool = False, step_sizes_and_alphas: Union[None, Tuple[List[float], List[float]]] = None, ) -> Union[Tensor, Tuple[Tensor, ...]]: if step_sizes_and_alphas is None: # retrieve step size and scaling factor for specified approximation method step_sizes_func, alphas_func = approximation_parameters(method) step_sizes, alphas = step_sizes_func(n_steps), alphas_func(n_steps) else: step_sizes, alphas = step_sizes_and_alphas # Compute scaled inputs from baseline to final input. scaled_features_tpl = tuple( torch.cat( [baseline + alpha * (input - baseline) for alpha in alphas], dim=0).requires_grad_() for input, baseline in zip(inputs, baselines)) additional_forward_args = _format_additional_forward_args( additional_forward_args) # apply number of steps to additional forward args # currently, number of steps is applied only to additional forward arguments # that are nd-tensors. It is assumed that the first dimension is # the number of batches. # dim -> (bsz * #steps x additional_forward_args[0].shape[1:], ...) input_additional_args = (_expand_additional_forward_args( additional_forward_args, n_steps) if additional_forward_args is not None else None) expanded_target = _expand_target(target, n_steps) # Returns gradient of output with respect to hidden layer. layer_gradients, _ = compute_layer_gradients_and_eval( forward_fn=self.forward_func, layer=self.layer, inputs=scaled_features_tpl, target_ind=expanded_target, additional_forward_args=input_additional_args, device_ids=self.device_ids, attribute_to_layer_input=attribute_to_layer_input, ) # flattening grads so that we can multiply it with step-size # calling contiguous to avoid `memory whole` problems scaled_grads = tuple( layer_grad.contiguous().view(n_steps, -1) * torch.tensor(step_sizes).view(n_steps, 1).to(layer_grad.device) for layer_grad in layer_gradients) # aggregates across all steps for each tensor in the input tuple attrs = tuple( _reshape_and_sum(scaled_grad, n_steps, inputs[0].shape[0], layer_grad.shape[1:]) for scaled_grad, layer_grad in zip(scaled_grads, layer_gradients)) return _format_output(len(attrs) > 1, attrs)
def evaluate( self, inputs: Any, additional_forward_args: Optional[Tuple] = None, target: TargetType = None, perturbations_per_eval: int = 1, attack_kwargs: Optional[Dict[str, Any]] = None, correct_fn_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[Any, Optional[Union[int, float]]]: r""" This method evaluates the model at each perturbed input and identifies the minimum perturbation that leads to an incorrect model prediction. It is recommended to provide a single input (batch size = 1) when using this to identify a minimal perturbation for the chosen example. If a batch of examples is provided, the default correct function identifies the minimal perturbation for at least 1 example in the batch to be misclassified. A custom correct_fn can be provided to customize this behavior and define correctness for the batch. Args: inputs (Any): Input for which minimal perturbation is computed. It can be provided as a tensor, tuple of tensors, or any raw input type (e.g. PIL image or text string). This input is provided directly as input to preproc function as well as any attack applied before preprocessing. If no pre-processing function is provided, this input is provided directly to the main model and all attacks. additional_forward_args (any, optional): If the forward function requires additional arguments other than the preprocessing outputs (or inputs if preproc_fn is None), this argument can be provided. It must be either a single additional argument of a Tensor or arbitrary (non-tuple) type or a tuple containing multiple additional arguments including tensors or any arbitrary python types. These arguments are provided to forward_func in order following the arguments in inputs. For a tensor, the first dimension of the tensor must correspond to the number of examples. For all other types, the given argument is used for all forward evaluations. Default: None target (TargetType): Target class for classification. This is required if using the default correct_fn perturbations_per_eval (int, optional): Allows perturbations of multiple attacks to be grouped and evaluated in one call of forward_fn Each forward pass will contain a maximum of perturbations_per_eval * #examples samples. For DataParallel models, each batch is split among the available devices, so evaluations on each available device contain at most (perturbations_per_eval * #examples) / num_devices samples. In order to apply this functionality, the output of preproc_fn (or inputs itself if no preproc_fn is provided) must be a tensor or tuple of tensors. Default: 1 attack_kwargs (dictionary, optional): Optional dictionary of keyword arguments provided to attack function correct_fn_kwargs (dictionary, optional): Optional dictionary of keyword arguments provided to correct function Returns: Tuple of (perturbed_inputs, param_val) if successful else Tuple of (None, None) - **perturbed inputs** (Any): Perturbed input (output of attack) which results in incorrect prediction. - param_val (int, float) Param value leading to perturbed inputs causing misclassification Examples:: >>> def gaussian_noise(inp: Tensor, std: float) -> Tensor: >>> return inp + std*torch.randn_like(inp) >>> min_pert = MinParamPerturbation(forward_func=resnet18, attack=gaussian_noise, arg_name="std", arg_min=0.0, arg_max=2.0, arg_step=0.01, ) >>> for images, labels in dataloader: >>> noised_image, min_std = min_pert.evaluate(inputs=images, target=labels) """ additional_forward_args = _format_additional_forward_args( additional_forward_args) expanded_additional_args = (_expand_additional_forward_args( additional_forward_args, perturbations_per_eval) if perturbations_per_eval > 1 else additional_forward_args) preproc_input = inputs if not self.preproc_fn else self.preproc_fn( inputs) if self.mode is MinParamPerturbationMode.LINEAR: search_fn = self._linear_search elif self.mode is MinParamPerturbationMode.BINARY: search_fn = self._binary_search else: raise NotImplementedError( "Chosen MinParamPerturbationMode is not supported!") return search_fn( inputs, preproc_input, attack_kwargs, additional_forward_args, expanded_additional_args, correct_fn_kwargs, target, perturbations_per_eval, )
def _binary_search( self, inputs: Any, preproc_input: Any, attack_kwargs: Optional[Dict[str, Any]], additional_forward_args: Any, expanded_additional_args: Any, correct_fn_kwargs: Optional[Dict[str, Any]], target: TargetType, perturbations_per_eval: int, ) -> Tuple[Any, Optional[Union[int, float]]]: min_range = self.arg_min max_range = self.arg_max min_so_far = None min_input = None while max_range > min_range: mid_step = ((max_range - min_range) // self.arg_step) // 2 if mid_step == 0 and min_range + self.arg_step < max_range: mid_step = 1 mid = min_range + (mid_step * self.arg_step) input_list = [] param_list = [] attack_inp_list = [] attack_success = False for i in range(self.num_attempts): preproc_attacked_inp, attacked_inp = self._apply_attack( inputs, preproc_input, attack_kwargs, mid) input_list.append(preproc_attacked_inp) param_list.append(mid) attack_inp_list.append(attacked_inp) if len(input_list) == perturbations_per_eval or i == ( self.num_attempts - 1): additional_args = expanded_additional_args if len(input_list) != perturbations_per_eval: additional_args = _expand_additional_forward_args( additional_forward_args, len(input_list)) successful_ind = self._evaluate_batch( input_list, additional_args, correct_fn_kwargs, target, ) if successful_ind is not None: attack_success = True max_range = mid if min_so_far is None or min_so_far > mid: min_so_far = mid min_input = attack_inp_list[successful_ind] break input_list = [] param_list = [] attack_inp_list = [] if math.isclose(min_range, mid): break if not attack_success: min_range = mid return min_input, min_so_far
def _ablation_generator( self, i, inputs, additional_args, target, baselines, input_mask, perturbations_per_eval, **kwargs ): """ This method is a generator which yields each perturbation to be evaluated including inputs, additional_forward_args, targets, and mask. """ extra_args = {} for key, value in kwargs.items(): # For any tuple argument in kwargs, we choose index i of the tuple. if isinstance(value, tuple): extra_args[key] = value[i] else: extra_args[key] = value input_mask = input_mask[i] if input_mask is not None else None min_feature, num_features, input_mask = self._get_feature_range_and_mask( inputs[i], input_mask, **extra_args ) num_examples = inputs[0].shape[0] perturbations_per_eval = min(perturbations_per_eval, num_features) baseline = baselines[i] if isinstance(baselines, tuple) else baselines if isinstance(baseline, torch.Tensor): baseline = baseline.reshape((1,) + baseline.shape) if perturbations_per_eval > 1: # Repeat features and additional args for batch size. all_features_repeated = [ torch.cat([inputs[j]] * perturbations_per_eval, dim=0) for j in range(len(inputs)) ] additional_args_repeated = ( _expand_additional_forward_args(additional_args, perturbations_per_eval) if additional_args is not None else None ) target_repeated = _expand_target(target, perturbations_per_eval) else: all_features_repeated = list(inputs) additional_args_repeated = additional_args target_repeated = target num_features_processed = min_feature while num_features_processed < num_features: current_num_ablated_features = min( perturbations_per_eval, num_features - num_features_processed ) # Store appropriate inputs and additional args based on batch size. if current_num_ablated_features != perturbations_per_eval: current_features = [ feature_repeated[0 : current_num_ablated_features * num_examples] for feature_repeated in all_features_repeated ] current_additional_args = ( _expand_additional_forward_args( additional_args, current_num_ablated_features ) if additional_args is not None else None ) current_target = _expand_target(target, current_num_ablated_features) else: current_features = all_features_repeated current_additional_args = additional_args_repeated current_target = target_repeated # Store existing tensor before modifying original_tensor = current_features[i] # Construct ablated batch for features in range num_features_processed # to num_features_processed + current_num_ablated_features and return # mask with same size as ablated batch. ablated_features has dimension # (current_num_ablated_features, num_examples, inputs[i].shape[1:]) # Note that in the case of sparse tensors, the second dimension # may not necessarilly be num_examples and will match the first # dimension of this tensor. current_reshaped = current_features[i].reshape( (current_num_ablated_features, -1) + current_features[i].shape[1:] ) ablated_features, current_mask = self._construct_ablated_input( current_reshaped, input_mask, baseline, num_features_processed, num_features_processed + current_num_ablated_features, **extra_args ) # current_features[i] has dimension # (current_num_ablated_features * num_examples, inputs[i].shape[1:]), # which can be provided to the model as input. current_features[i] = ablated_features.reshape( (-1,) + ablated_features.shape[2:] ) yield tuple( current_features ), current_additional_args, current_target, current_mask # Replace existing tensor at index i. current_features[i] = original_tensor num_features_processed += current_num_ablated_features
def _attribute( self, inputs: Tuple[Tensor, ...], baselines: Tuple[Union[Tensor, int, float], ...], target: TargetType = None, additional_forward_args: Any = None, n_steps: int = 50, method: str = "gausslegendre", step_sizes_and_alphas: Union[None, Tuple[List[float], List[float]]] = None, ) -> Tuple[Tensor, ...]: if step_sizes_and_alphas is None: # retrieve step size and scaling factor for specified # approximation method step_sizes_func, alphas_func = approximation_parameters(method) step_sizes, alphas = step_sizes_func(n_steps), alphas_func(n_steps) else: step_sizes, alphas = step_sizes_and_alphas # scale features and compute gradients. (batch size is abbreviated as bsz) # scaled_features' dim -> (bsz * #steps x inputs[0].shape[1:], ...) scaled_features_tpl = tuple( torch.cat( [baseline + alpha * (input - baseline) for alpha in alphas], dim=0 ).requires_grad_() for input, baseline in zip(inputs, baselines) ) additional_forward_args = _format_additional_forward_args( additional_forward_args ) # apply number of steps to additional forward args # currently, number of steps is applied only to additional forward arguments # that are nd-tensors. It is assumed that the first dimension is # the number of batches. # dim -> (bsz * #steps x additional_forward_args[0].shape[1:], ...) input_additional_args = ( _expand_additional_forward_args(additional_forward_args, n_steps) if additional_forward_args is not None else None ) expanded_target = _expand_target(target, n_steps) # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...) grads = self.gradient_func( forward_fn=self.forward_func, inputs=scaled_features_tpl, target_ind=expanded_target, additional_forward_args=input_additional_args, ) # flattening grads so that we can multilpy it with step-size # calling contiguous to avoid `memory whole` problems scaled_grads = [ grad.contiguous().view(n_steps, -1) * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device) for grad in grads ] # aggregates across all steps for each tensor in the input tuple # total_grads has the same dimensionality as inputs total_grads = tuple( _reshape_and_sum( scaled_grad, n_steps, grad.shape[0] // n_steps, grad.shape[1:] ) for (scaled_grad, grad) in zip(scaled_grads, grads) ) # computes attribution for each tensor in input tuple # attributions has the same dimensionality as inputs if not self.multiplies_by_inputs: attributions = total_grads else: attributions = tuple( total_grad * (input - baseline) for total_grad, input, baseline in zip(total_grads, inputs, baselines) ) return attributions
def _perturbation_generator( self, inputs: Tuple[Tensor, ...], additional_args: Any, target: TargetType, baselines: Tuple[Tensor, ...], input_masks: TensorOrTupleOfTensorsGeneric, feature_permutation: Sequence[int], perturbations_per_eval: int, ) -> Iterable[Tuple[Tuple[Tensor, ...], Any, TargetType, Tuple[Tensor, ...]]]: """ This method is a generator which yields each perturbation to be evaluated including inputs, additional_forward_args, targets, and mask. """ # current_tensors starts at baselines and includes each additional feature as # added based on the permutation order. current_tensors = baselines current_tensors_list = [] current_mask_list = [] # Compute repeated additional args and targets additional_args_repeated = ( _expand_additional_forward_args(additional_args, perturbations_per_eval) if additional_args is not None else None ) target_repeated = _expand_target(target, perturbations_per_eval) for i in range(len(feature_permutation)): current_tensors = tuple( current * (~(mask == feature_permutation[i])).to(current.dtype) + input * (mask == feature_permutation[i]).to(input.dtype) for input, current, mask in zip(inputs, current_tensors, input_masks) ) current_tensors_list.append(current_tensors) current_mask_list.append( tuple(mask == feature_permutation[i] for mask in input_masks) ) if len(current_tensors_list) == perturbations_per_eval: combined_inputs = tuple( torch.cat(aligned_tensors, dim=0) for aligned_tensors in zip(*current_tensors_list) ) combined_masks = tuple( torch.stack(aligned_masks, dim=0) for aligned_masks in zip(*current_mask_list) ) yield ( combined_inputs, additional_args_repeated, target_repeated, combined_masks, ) current_tensors_list = [] current_mask_list = [] # Create batch with remaining evaluations, may not be a complete batch # (= perturbations_per_eval) if len(current_tensors_list) != 0: additional_args_repeated = ( _expand_additional_forward_args( additional_args, len(current_tensors_list) ) if additional_args is not None else None ) target_repeated = _expand_target(target, len(current_tensors_list)) combined_inputs = tuple( torch.cat(aligned_tensors, dim=0) for aligned_tensors in zip(*current_tensors_list) ) combined_masks = tuple( torch.stack(aligned_masks, dim=0) for aligned_masks in zip(*current_mask_list) ) yield ( combined_inputs, additional_args_repeated, target_repeated, combined_masks, )
def evaluate( self, inputs: Any, additional_forward_args: Any = None, perturbations_per_eval: int = 1, **kwargs, ) -> Dict[str, Union[MetricResultType, Dict[str, MetricResultType]]]: r""" Evaluate model and attack performance on provided inputs Args: inputs (any): Input for which attack metrics are computed. It can be provided as a tensor, tuple of tensors, or any raw input type (e.g. PIL image or text string). This input is provided directly as input to preproc function as well as any attack applied before preprocessing. If no pre-processing function is provided, this input is provided directly to the main model and all attacks. additional_forward_args (any, optional): If the forward function requires additional arguments other than the preprocessing outputs (or inputs if preproc_fn is None), this argument can be provided. It must be either a single additional argument of a Tensor or arbitrary (non-tuple) type or a tuple containing multiple additional arguments including tensors or any arbitrary python types. These arguments are provided to forward_func in order following the arguments in inputs. For a tensor, the first dimension of the tensor must correspond to the number of examples. For all other types, the given argument is used for all forward evaluations. Default: None perturbations_per_eval (int, optional): Allows perturbations of multiple attacks to be grouped and evaluated in one call of forward_fn Each forward pass will contain a maximum of perturbations_per_eval * #examples samples. For DataParallel models, each batch is split among the available devices, so evaluations on each available device contain at most (perturbations_per_eval * #examples) / num_devices samples. In order to apply this functionality, the output of preproc_fn (or inputs itself if no preproc_fn is provided) must be a tensor or tuple of tensors. Default: 1 kwargs (any, optional): Additional keyword arguments provided to metric function as well as selected attacks based on chosen additional_args Returns: - **attack results** Dict: str -> Dict[str, Union[Tensor, Tuple[Tensor, ...]]]: Dictionary containing attack results for provided batch. Maps attack name to dictionary, containing best-case, worst-case and average-case results for attack. Dictionary contains keys "mean", "max" and "min" when num_attempts > 1 and only "mean" for num_attempts = 1, which contains the (single) metric result for the attack attempt. An additional key of 'Original' is included with metric results without any perturbations. Examples:: >>> def accuracy_metric(model_out: Tensor, targets: Tensor): >>> return torch.argmax(model_out, dim=1) == targets).float() >>> attack_metric = AttackComparator(model=resnet18, metric=accuracy_metric, preproc_fn=normalize) >>> random_rotation = transforms.RandomRotation() >>> jitter = transforms.ColorJitter() >>> attack_metric.add_attack(random_rotation, "Random Rotation", >>> num_attempts = 5) >>> attack_metric.add_attack((jitter, "Jitter", num_attempts = 1) >>> attack_metric.add_attack(FGSM(resnet18), "FGSM 0.1", num_attempts = 1, >>> apply_before_preproc=False, >>> attack_kwargs={epsilon: 0.1}, >>> additional_args=["targets"]) >>> for images, labels in dataloader: >>> batch_results = attack_metric.evaluate(inputs=images, targets=labels) """ additional_forward_args = _format_additional_forward_args( additional_forward_args ) expanded_additional_args = ( _expand_additional_forward_args( additional_forward_args, perturbations_per_eval ) if perturbations_per_eval > 1 else additional_forward_args ) preproc_input = None if self.preproc_fn is not None: preproc_input = self.preproc_fn(inputs) else: preproc_input = inputs input_list = [preproc_input] key_list = [ORIGINAL_KEY] batch_summarizers = {ORIGINAL_KEY: Summarizer([Mean()])} if ORIGINAL_KEY not in self.summary_results: self.summary_results[ORIGINAL_KEY] = Summarizer( [stat() for stat in self.aggregate_stats] ) def _check_and_evaluate(input_list, key_list): if len(input_list) == perturbations_per_eval: self._evaluate_batch( input_list, expanded_additional_args, key_list, batch_summarizers, kwargs, ) return [], [] return input_list, key_list input_list, key_list = _check_and_evaluate(input_list, key_list) for attack_key in self.attacks: attack = self.attacks[attack_key] if attack.num_attempts > 1: stats = [stat() for stat in self.batch_stats] else: stats = [Mean()] batch_summarizers[attack.name] = Summarizer(stats) additional_attack_args = {} for key in attack.additional_args: if key not in kwargs: warnings.warn( f"Additional sample arg {key} not provided for {attack_key}" ) else: additional_attack_args[key] = kwargs[key] for _ in range(attack.num_attempts): if attack.apply_before_preproc: attacked_inp = attack.attack_fn( inputs, **additional_attack_args, **attack.attack_kwargs ) preproc_attacked_inp = ( self.preproc_fn(attacked_inp) if self.preproc_fn else attacked_inp ) else: preproc_attacked_inp = attack.attack_fn( preproc_input, **additional_attack_args, **attack.attack_kwargs ) input_list.append(preproc_attacked_inp) key_list.append(attack.name) input_list, key_list = _check_and_evaluate(input_list, key_list) if len(input_list) > 0: final_add_args = _expand_additional_forward_args( additional_forward_args, len(input_list) ) self._evaluate_batch( input_list, final_add_args, key_list, batch_summarizers, kwargs ) return self._parse_and_update_results(batch_summarizers)
def _attribute( self, inputs: Tuple[Tensor, ...], baselines: Tuple[Union[Tensor, int, float], ...], target: TargetType = None, additional_forward_args: Any = None, n_steps: int = 50, method: str = "gausslegendre", attribute_to_layer_input: bool = False, step_sizes_and_alphas: Union[None, Tuple[List[float], List[float]]] = None, ) -> Union[Tensor, Tuple[Tensor, ...]]: num_examples = inputs[0].shape[0] if step_sizes_and_alphas is None: # Retrieve scaling factors for specified approximation method step_sizes_func, alphas_func = approximation_parameters(method) alphas = alphas_func(n_steps + 1) else: _, alphas = step_sizes_and_alphas # Compute scaled inputs from baseline to final input. scaled_features_tpl = tuple( torch.cat( [baseline + alpha * (input - baseline) for alpha in alphas], dim=0 ).requires_grad_() for input, baseline in zip(inputs, baselines) ) additional_forward_args = _format_additional_forward_args( additional_forward_args ) # apply number of steps to additional forward args # currently, number of steps is applied only to additional forward arguments # that are nd-tensors. It is assumed that the first dimension is # the number of batches. # dim -> (#examples * #steps x additional_forward_args[0].shape[1:], ...) input_additional_args = ( _expand_additional_forward_args(additional_forward_args, n_steps + 1) if additional_forward_args is not None else None ) expanded_target = _expand_target(target, n_steps + 1) # Conductance Gradients - Returns gradient of output with respect to # hidden layer and hidden layer evaluated at each input. (layer_gradients, layer_evals,) = compute_layer_gradients_and_eval( forward_fn=self.forward_func, layer=self.layer, inputs=scaled_features_tpl, additional_forward_args=input_additional_args, target_ind=expanded_target, device_ids=self.device_ids, attribute_to_layer_input=attribute_to_layer_input, ) # Compute differences between consecutive evaluations of layer_eval. # This approximates the total input gradient of each step multiplied # by the step size. grad_diffs = tuple( layer_eval[num_examples:] - layer_eval[:-num_examples] for layer_eval in layer_evals ) # Element-wise multiply gradient of output with respect to hidden layer # and summed gradients with respect to input (chain rule) and sum # across stepped inputs. attributions = tuple( _reshape_and_sum( grad_diff * layer_gradient[:-num_examples], n_steps, num_examples, layer_eval.shape[1:], ) for layer_gradient, layer_eval, grad_diff in zip( layer_gradients, layer_evals, grad_diffs ) ) return _format_output(len(attributions) > 1, attributions)