def _assert_univariable(
     self, type: str, approximation_method: str = "gausslegendre"
 ) -> None:
     model = BasicModel()
     self._compute_attribution_and_evaluate(
         model,
         torch.tensor([1.0], requires_grad=True),
         torch.tensor([0.0]),
         type=type,
         approximation_method=approximation_method,
     )
     self._compute_attribution_and_evaluate(
         model,
         torch.tensor([0.0]),
         torch.tensor([0.0]),
         type=type,
         approximation_method=approximation_method,
     )
     self._compute_attribution_and_evaluate(
         model,
         torch.tensor([-1.0], requires_grad=True),
         0.00001,
         type=type,
         approximation_method=approximation_method,
     )
Example #2
0
 def test_attack_random_start(self) -> None:
     model = BasicModel()
     input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
     adv = PGD(model)
     perturbed_input = adv.perturb(input,
                                   0.25,
                                   0.1,
                                   0,
                                   4,
                                   random_start=True)
     assertTensorAlmostEqual(
         self,
         perturbed_input,
         [[2.0, -9.0, 9.0, 1.0, -3.0]],
         delta=0.25,
         mode="max",
     )
     perturbed_input = adv.perturb(input,
                                   0.25,
                                   0.1,
                                   0,
                                   4,
                                   norm="L2",
                                   random_start=True)
     norm = torch.norm((perturbed_input - input).squeeze()).numpy()
     self.assertLessEqual(norm, 0.25)
Example #3
0
 def test_attack_targeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[9.0, 10.0, -6.0, -1.0]], requires_grad=True)
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.2, 0.1, 3, 3, targeted=True)
     assertArraysAlmostEqual(torch.flatten(perturbed_input).tolist(),
                             [9.0, 10.0, -6.0, -1.2],
                             delta=0.01)
Example #4
0
 def test_attack_label_listtuple(self) -> None:
     model = BasicModel()
     input = torch.tensor(
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]],
         requires_grad=True)
     labels: List[Tuple[int, ...]] = [(1, 1), (0, 1)]
     self._FGSM_assert(model, input, labels, 0.1,
                       [4.0, 2.0, -1.0, -1.9, 3.0, -3.9, 10.0, 5.0])
Example #5
0
 def test_attack_label_tuple(self) -> None:
     model = BasicModel()
     input = torch.tensor(
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]],
         requires_grad=True)
     labels = (0, 1)
     self._FGSM_assert(model, input, labels, 0.1,
                       [4.0, 2.0, -1.0, -2.0, 3.0, -3.9, 10.0, 5.0])
Example #6
0
 def test_attack_targeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[9.0, 10.0, -6.0, -1.0]])
     self._FGSM_assert(model,
                       input,
                       3,
                       0.2, [[9.0, 10.0, -6.0, -1.2]],
                       targeted=True)
Example #7
0
 def test_gradient_basic_2(self) -> None:
     model = BasicModel()
     input = torch.tensor([[-3.0]], requires_grad=True)
     input.grad = torch.tensor([[14.0]])
     grads = compute_gradients(model, input)[0]
     assertArraysAlmostEqual(grads.squeeze(0).tolist(), [1.0], delta=0.01)
     # Verify grad attribute is not altered
     assertArraysAlmostEqual(input.grad.squeeze(0).tolist(), [14.0],
                             delta=0.0)
Example #8
0
 def test_simple_ablation_int_to_int_nt(self) -> None:
     ablation_algo = NoiseTunnel(FeatureAblation(BasicModel()))
     inp = torch.tensor([[-3, 1, 2]]).float()
     self._ablation_test_assert(
         ablation_algo,
         inp,
         [[-3.0, 0.0, 0.0]],
         perturbations_per_eval=(1, 2, 3),
         stdevs=1e-10,
     )
Example #9
0
 def test_attack_nontargeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.25, 0.1, 2, 4)
     assertArraysAlmostEqual(
         torch.flatten(perturbed_input).tolist(),
         [2.0, -9.0, 9.0, 1.0, -2.8],
         delta=0.01,
     )
Example #10
0
    def test_simple_ablation_int_to_float(self) -> None:
        net = BasicModel()

        def wrapper_func(inp):
            return net(inp).float()

        inp = torch.tensor([[-3, 1, 2]])
        self._ablation_test_assert(
            wrapper_func, inp, [[-3.0, 0.0, 0.0]], perturbations_per_eval=(1, 2, 3)
        )
Example #11
0
 def test_gradient_target_tuple(self) -> None:
     model = BasicModel()
     input = torch.tensor(
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]],
         requires_grad=True)
     grads = compute_gradients(model, input, target_ind=(0, 1))[0]
     assertArraysAlmostEqual(
         torch.flatten(grads).tolist(),
         [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
         delta=0.01,
     )
Example #12
0
 def test_attack_3dimensional_input(self) -> None:
     model = BasicModel()
     input = torch.tensor(
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]],
         requires_grad=True)
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.25, 0.1, 3, (0, 1))
     assertArraysAlmostEqual(
         torch.flatten(perturbed_input).tolist(),
         [4.0, 2.0, -1.0, -2.0, 3.0, -3.75, 10.0, 5.0],
         delta=0.01,
     )
Example #13
0
 def test_attack_nontargeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.25, 0.1, 2, 4)
     assertTensorAlmostEqual(
         self,
         perturbed_input,
         [[2.0, -9.0, 9.0, 1.0, -2.8]],
         delta=0.01,
         mode="max",
     )
Example #14
0
 def test_attack_targeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[9.0, 10.0, -6.0, -1.0]], requires_grad=True)
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.2, 0.1, 3, 3, targeted=True)
     assertTensorAlmostEqual(
         self,
         perturbed_input,
         [[9.0, 10.0, -6.0, -1.2]],
         delta=0.01,
         mode="max",
     )
Example #15
0
 def test_attack_bound(self) -> None:
     model = BasicModel()
     input = torch.tensor([[9.0, 10.0, -6.0, -1.0]])
     self._FGSM_assert(
         model,
         input,
         3,
         0.2,
         [[5.0, 5.0, -5.0, -1.2]],
         targeted=True,
         lower_bound=-5.0,
         upper_bound=5.0,
     )
Example #16
0
 def test_attack_3dimensional_input(self) -> None:
     model = BasicModel()
     input = torch.tensor(
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -4.0], [10.0, 5.0]]],
         requires_grad=True)
     adv = PGD(model)
     perturbed_input = adv.perturb(input, 0.25, 0.1, 3, (0, 1))
     assertTensorAlmostEqual(
         self,
         perturbed_input,
         [[[4.0, 2.0], [-1.0, -2.0]], [[3.0, -3.75], [10.0, 5.0]]],
         delta=0.01,
         mode="max",
     )
 def test_minimal_pert_basic_linear(self) -> None:
     model = BasicModel()
     inp = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
     minimal_pert = MinParamPerturbation(
         forward_func=lambda x: model(x) + torch.tensor(
             [[0.000001, 0.0, 0.0, 0.0, 0.0]]),
         attack=inp_subtract,
         arg_name="add_arg",
         arg_min=0.0,
         arg_max=1000.0,
         arg_step=1.0,
     )
     target_inp, pert = minimal_pert.evaluate(inp,
                                              target=0,
                                              attack_kwargs={"ind": 0})
     self.assertAlmostEqual(cast(float, pert), 2.0)
     assertTensorAlmostEqual(self, target_inp,
                             torch.tensor([[0.0, -9.0, 9.0, 1.0, -3.0]]))
Example #18
0
    def layer_method_with_input_layer_patches(
        self,
        layer_method_class: Callable,
        equiv_method_class: Callable,
        multi_layer: bool,
    ) -> None:
        model = BasicModel_MultiLayer_TrueMultiInput(
        ) if multi_layer else BasicModel()

        input_names = ["x1", "x2", "x3", "x4"] if multi_layer else ["input"]
        model = ModelInputWrapper(model)

        layers = [model.input_maps[inp] for inp in input_names]
        layer_method = layer_method_class(
            model, layer=layers if multi_layer else layers[0])
        equivalent_method = equiv_method_class(model)

        inputs = tuple(torch.rand(5, 3) for _ in input_names)
        baseline = tuple(torch.zeros(5, 3) for _ in input_names)

        args = inspect.getfullargspec(
            equivalent_method.attribute.__wrapped__).args

        args_to_use = [inputs]
        if "baselines" in args:
            args_to_use += [baseline]

        a1 = layer_method.attribute(*args_to_use, target=0)
        a2 = layer_method.attribute(*args_to_use,
                                    target=0,
                                    attribute_to_layer_input=True)

        real_attributions = equivalent_method.attribute(*args_to_use, target=0)

        if not isinstance(a1, tuple):
            a1 = (a1, )
            a2 = (a2, )

        if not isinstance(real_attributions, tuple):
            real_attributions = (real_attributions, )

        assertTensorTuplesAlmostEqual(self, a1, a2)
        assertTensorTuplesAlmostEqual(self, a1, real_attributions)
Example #19
0
def _get_basic_config() -> Tuple[Module, Tensor, Tensor, Any]:
    input = torch.tensor([1.0, 2.0, 3.0, 0.0, -1.0, 7.0], requires_grad=True)
    # manually percomputed gradients
    grads = torch.tensor([-0.0, -0.0, -0.0, 1.0, 1.0, -0.0])
    return BasicModel(), input, grads, None
Example #20
0
 def test_gradient_basic_2(self) -> None:
     model = BasicModel()
     input = torch.tensor([[-3.0]], requires_grad=True)
     grads = compute_gradients(model, input)[0]
     assertArraysAlmostEqual(grads.squeeze(0).tolist(), [1.0], delta=0.01)
Example #21
0
 def test_simple_ablation_int_to_int(self) -> None:
     net = BasicModel()
     inp = torch.tensor([[-3, 1, 2]])
     self._ablation_test_assert(
         net, inp, [[-3, 0, 0]], perturbations_per_eval=(1, 2, 3)
     )
Example #22
0
    def test_attack_comparator_basic(self) -> None:
        model = BasicModel()
        inp = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
        attack_comp = AttackComparator(
            forward_func=lambda x: model(x) + torch.tensor(
                [[0.000001, 0.0, 0.0, 0.0, 0.0]]),
            metric=tuple_metric,
        )
        attack_comp.add_attack(
            drop_column_perturb,
            name="first_column_perturb",
            attack_kwargs={"column": 0},
        )
        attack_comp.add_attack(
            drop_column_perturb,
            name="last_column_perturb",
            attack_kwargs={"column": -1},
        )
        attack_comp.add_attack(
            FGSM(model),
            attack_kwargs={"epsilon": 0.5},
            additional_attack_arg_names=["target"],
        )
        batch_results = attack_comp.evaluate(inp, target=0, named_tuple=True)
        expected_first_results = {
            "Original": (1.0, 1.0),
            "first_column_perturb": {
                "mean": (0.0, 0.0)
            },
            "last_column_perturb": {
                "mean": (1.0, 1.0)
            },
            "FGSM": {
                "mean": (1.0, 1.0)
            },
        }
        self._compare_results(batch_results, expected_first_results)

        alt_inp = torch.tensor([[1.0, 2.0, -3.0, 4.0, -5.0]])

        second_batch_results = attack_comp.evaluate(alt_inp,
                                                    target=4,
                                                    named_tuple=True)
        expected_second_results = {
            "Original": (0.0, -5.0),
            "first_column_perturb": {
                "mean": (0.0, -5.0)
            },
            "last_column_perturb": {
                "mean": (0.0, 0.0)
            },
            "FGSM": {
                "mean": (0.0, -4.5)
            },
        }
        self._compare_results(second_batch_results, expected_second_results)

        expected_summary_results = {
            "Original": {
                "mean": (0.5, -2.0)
            },
            "first_column_perturb": {
                "mean": (0.0, -2.5)
            },
            "last_column_perturb": {
                "mean": (0.5, 0.5)
            },
            "FGSM": {
                "mean": (0.5, -1.75)
            },
        }
        self._compare_results(attack_comp.summary(), expected_summary_results)
Example #23
0
 def test_simple_ablation_int_to_int(self) -> None:
     ablation_algo = FeatureAblation(BasicModel())
     inp = torch.tensor([[-3, 1, 2]])
     self._ablation_test_assert(ablation_algo,
                                inp, [[-3, 0, 0]],
                                perturbations_per_eval=(1, 2, 3))
Example #24
0
 def test_attack_nontargeted(self) -> None:
     model = BasicModel()
     input = torch.tensor([[2.0, -9.0, 9.0, 1.0, -3.0]])
     self._FGSM_assert(model, input, 1, 0.1, [[2.0, -8.9, 9.0, 1.0, -3.0]])