def _assert_univariable( self, type: str, approximation_method: str = "gausslegendre" ) -> None: model = BasicModel() self._compute_attribution_and_evaluate( model, torch.tensor([1.0], requires_grad=True), torch.tensor([0.0]), type=type, approximation_method=approximation_method, ) self._compute_attribution_and_evaluate( model, torch.tensor([0.0]), torch.tensor([0.0]), type=type, approximation_method=approximation_method, ) self._compute_attribution_and_evaluate( model, torch.tensor([-1.0], requires_grad=True), 0.00001, type=type, approximation_method=approximation_method, )
def test_attack_random_start(self) -> None: model = BasicModel() input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) adv = PGD(model) perturbed_input = adv.perturb(input, 0.25, 0.1, 0, 4, random_start=True) assertTensorAlmostEqual( self, perturbed_input, [[2.0, -9.0, 9.0, 1.0, -3.0]], delta=0.25, mode="max", ) perturbed_input = adv.perturb(input, 0.25, 0.1, 0, 4, norm="L2", random_start=True) norm = torch.norm((perturbed_input - input).squeeze()).numpy() self.assertLessEqual(norm, 0.25)
def test_attack_targeted(self) -> None: model = BasicModel() input = torch.tensor([[9.0, 10.0, -6.0, -1.0]], requires_grad=True) adv = PGD(model) perturbed_input = adv.perturb(input, 0.2, 0.1, 3, 3, targeted=True) assertArraysAlmostEqual(torch.flatten(perturbed_input).tolist(), [9.0, 10.0, -6.0, -1.2], delta=0.01)
def test_attack_label_listtuple(self) -> None: model = BasicModel() input = torch.tensor( [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]], requires_grad=True) labels: List[Tuple[int, ...]] = [(1, 1), (0, 1)] self._FGSM_assert(model, input, labels, 0.1, [4.0, 2.0, -1.0, -1.9, 3.0, -3.9, 10.0, 5.0])
def test_attack_label_tuple(self) -> None: model = BasicModel() input = torch.tensor( [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]], requires_grad=True) labels = (0, 1) self._FGSM_assert(model, input, labels, 0.1, [4.0, 2.0, -1.0, -2.0, 3.0, -3.9, 10.0, 5.0])
def test_attack_targeted(self) -> None: model = BasicModel() input = torch.tensor([[9.0, 10.0, -6.0, -1.0]]) self._FGSM_assert(model, input, 3, 0.2, [[9.0, 10.0, -6.0, -1.2]], targeted=True)
def test_gradient_basic_2(self) -> None: model = BasicModel() input = torch.tensor([[-3.0]], requires_grad=True) input.grad = torch.tensor([[14.0]]) grads = compute_gradients(model, input)[0] assertArraysAlmostEqual(grads.squeeze(0).tolist(), [1.0], delta=0.01) # Verify grad attribute is not altered assertArraysAlmostEqual(input.grad.squeeze(0).tolist(), [14.0], delta=0.0)
def test_simple_ablation_int_to_int_nt(self) -> None: ablation_algo = NoiseTunnel(FeatureAblation(BasicModel())) inp = torch.tensor([[-3, 1, 2]]).float() self._ablation_test_assert( ablation_algo, inp, [[-3.0, 0.0, 0.0]], perturbations_per_eval=(1, 2, 3), stdevs=1e-10, )
def test_attack_nontargeted(self) -> None: model = BasicModel() input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) adv = PGD(model) perturbed_input = adv.perturb(input, 0.25, 0.1, 2, 4) assertArraysAlmostEqual( torch.flatten(perturbed_input).tolist(), [2.0, -9.0, 9.0, 1.0, -2.8], delta=0.01, )
def test_simple_ablation_int_to_float(self) -> None: net = BasicModel() def wrapper_func(inp): return net(inp).float() inp = torch.tensor([[-3, 1, 2]]) self._ablation_test_assert( wrapper_func, inp, [[-3.0, 0.0, 0.0]], perturbations_per_eval=(1, 2, 3) )
def test_gradient_target_tuple(self) -> None: model = BasicModel() input = torch.tensor( [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]], requires_grad=True) grads = compute_gradients(model, input, target_ind=(0, 1))[0] assertArraysAlmostEqual( torch.flatten(grads).tolist(), [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], delta=0.01, )
def test_attack_3dimensional_input(self) -> None: model = BasicModel() input = torch.tensor( [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]], requires_grad=True) adv = PGD(model) perturbed_input = adv.perturb(input, 0.25, 0.1, 3, (0, 1)) assertArraysAlmostEqual( torch.flatten(perturbed_input).tolist(), [4.0, 2.0, -1.0, -2.0, 3.0, -3.75, 10.0, 5.0], delta=0.01, )
def test_attack_nontargeted(self) -> None: model = BasicModel() input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) adv = PGD(model) perturbed_input = adv.perturb(input, 0.25, 0.1, 2, 4) assertTensorAlmostEqual( self, perturbed_input, [[2.0, -9.0, 9.0, 1.0, -2.8]], delta=0.01, mode="max", )
def test_attack_targeted(self) -> None: model = BasicModel() input = torch.tensor([[9.0, 10.0, -6.0, -1.0]], requires_grad=True) adv = PGD(model) perturbed_input = adv.perturb(input, 0.2, 0.1, 3, 3, targeted=True) assertTensorAlmostEqual( self, perturbed_input, [[9.0, 10.0, -6.0, -1.2]], delta=0.01, mode="max", )
def test_attack_bound(self) -> None: model = BasicModel() input = torch.tensor([[9.0, 10.0, -6.0, -1.0]]) self._FGSM_assert( model, input, 3, 0.2, [[5.0, 5.0, -5.0, -1.2]], targeted=True, lower_bound=-5.0, upper_bound=5.0, )
def test_attack_3dimensional_input(self) -> None: model = BasicModel() input = torch.tensor( [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]], requires_grad=True) adv = PGD(model) perturbed_input = adv.perturb(input, 0.25, 0.1, 3, (0, 1)) assertTensorAlmostEqual( self, perturbed_input, [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -3.75], [10.0, 5.0]]], delta=0.01, mode="max", )
def test_minimal_pert_basic_linear(self) -> None: model = BasicModel() inp = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) minimal_pert = MinParamPerturbation( forward_func=lambda x: model(x) + torch.tensor( [[0.000001, 0.0, 0.0, 0.0, 0.0]]), attack=inp_subtract, arg_name="add_arg", arg_min=0.0, arg_max=1000.0, arg_step=1.0, ) target_inp, pert = minimal_pert.evaluate(inp, target=0, attack_kwargs={"ind": 0}) self.assertAlmostEqual(cast(float, pert), 2.0) assertTensorAlmostEqual(self, target_inp, torch.tensor([[0.0, -9.0, 9.0, 1.0, -3.0]]))
def layer_method_with_input_layer_patches( self, layer_method_class: Callable, equiv_method_class: Callable, multi_layer: bool, ) -> None: model = BasicModel_MultiLayer_TrueMultiInput( ) if multi_layer else BasicModel() input_names = ["x1", "x2", "x3", "x4"] if multi_layer else ["input"] model = ModelInputWrapper(model) layers = [model.input_maps[inp] for inp in input_names] layer_method = layer_method_class( model, layer=layers if multi_layer else layers[0]) equivalent_method = equiv_method_class(model) inputs = tuple(torch.rand(5, 3) for _ in input_names) baseline = tuple(torch.zeros(5, 3) for _ in input_names) args = inspect.getfullargspec( equivalent_method.attribute.__wrapped__).args args_to_use = [inputs] if "baselines" in args: args_to_use += [baseline] a1 = layer_method.attribute(*args_to_use, target=0) a2 = layer_method.attribute(*args_to_use, target=0, attribute_to_layer_input=True) real_attributions = equivalent_method.attribute(*args_to_use, target=0) if not isinstance(a1, tuple): a1 = (a1, ) a2 = (a2, ) if not isinstance(real_attributions, tuple): real_attributions = (real_attributions, ) assertTensorTuplesAlmostEqual(self, a1, a2) assertTensorTuplesAlmostEqual(self, a1, real_attributions)
def _get_basic_config() -> Tuple[Module, Tensor, Tensor, Any]: input = torch.tensor([1.0, 2.0, 3.0, 0.0, -1.0, 7.0], requires_grad=True) # manually percomputed gradients grads = torch.tensor([-0.0, -0.0, -0.0, 1.0, 1.0, -0.0]) return BasicModel(), input, grads, None
def test_gradient_basic_2(self) -> None: model = BasicModel() input = torch.tensor([[-3.0]], requires_grad=True) grads = compute_gradients(model, input)[0] assertArraysAlmostEqual(grads.squeeze(0).tolist(), [1.0], delta=0.01)
def test_simple_ablation_int_to_int(self) -> None: net = BasicModel() inp = torch.tensor([[-3, 1, 2]]) self._ablation_test_assert( net, inp, [[-3, 0, 0]], perturbations_per_eval=(1, 2, 3) )
def test_attack_comparator_basic(self) -> None: model = BasicModel() inp = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) attack_comp = AttackComparator( forward_func=lambda x: model(x) + torch.tensor( [[0.000001, 0.0, 0.0, 0.0, 0.0]]), metric=tuple_metric, ) attack_comp.add_attack( drop_column_perturb, name="first_column_perturb", attack_kwargs={"column": 0}, ) attack_comp.add_attack( drop_column_perturb, name="last_column_perturb", attack_kwargs={"column": -1}, ) attack_comp.add_attack( FGSM(model), attack_kwargs={"epsilon": 0.5}, additional_attack_arg_names=["target"], ) batch_results = attack_comp.evaluate(inp, target=0, named_tuple=True) expected_first_results = { "Original": (1.0, 1.0), "first_column_perturb": { "mean": (0.0, 0.0) }, "last_column_perturb": { "mean": (1.0, 1.0) }, "FGSM": { "mean": (1.0, 1.0) }, } self._compare_results(batch_results, expected_first_results) alt_inp = torch.tensor([[1.0, 2.0, -3.0, 4.0, -5.0]]) second_batch_results = attack_comp.evaluate(alt_inp, target=4, named_tuple=True) expected_second_results = { "Original": (0.0, -5.0), "first_column_perturb": { "mean": (0.0, -5.0) }, "last_column_perturb": { "mean": (0.0, 0.0) }, "FGSM": { "mean": (0.0, -4.5) }, } self._compare_results(second_batch_results, expected_second_results) expected_summary_results = { "Original": { "mean": (0.5, -2.0) }, "first_column_perturb": { "mean": (0.0, -2.5) }, "last_column_perturb": { "mean": (0.5, 0.5) }, "FGSM": { "mean": (0.5, -1.75) }, } self._compare_results(attack_comp.summary(), expected_summary_results)
def test_simple_ablation_int_to_int(self) -> None: ablation_algo = FeatureAblation(BasicModel()) inp = torch.tensor([[-3, 1, 2]]) self._ablation_test_assert(ablation_algo, inp, [[-3, 0, 0]], perturbations_per_eval=(1, 2, 3))
def test_attack_nontargeted(self) -> None: model = BasicModel() input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]]) self._FGSM_assert(model, input, 1, 0.1, [[2.0, -8.9, 9.0, 1.0, -3.0]])