def test_binarization_attack( fmodel_and_data_ext_for_attacks: ModeAndDataAndDescription, ) -> None: # get a model with thresholding (fmodel, x, y), _, low_dimensional_input = fmodel_and_data_ext_for_attacks # binarization doesn't work well for imagenet models if not low_dimensional_input: pytest.skip() x = (x - fmodel.bounds.lower) / (fmodel.bounds.upper - fmodel.bounds.lower) fmodel = fmodel.transform_bounds((0, 1)) fmodel = ThresholdingWrapper(fmodel, threshold=0.5) acc = accuracy(fmodel, x, y) assert acc > 0 # find some adversarials and check that they are non-trivial attack = BinarySearchContrastReductionAttack(target=0) advs, _, _ = attack(fmodel, x, y, epsilons=None) assert accuracy(fmodel, advs, y) < acc # run the refinement attack attack2 = BinarizationRefinementAttack(threshold=0.5, included_in="upper") advs2, _, _ = attack2(fmodel, x, y, starting_points=advs, epsilons=None) # make sure the predicted classes didn't change assert (fmodel(advs).argmax(axis=-1) == fmodel(advs2).argmax(axis=-1)).all() # make sure the perturbations didn't get larger and some got smaller norms1 = flatten(advs - x).norms.l2(axis=-1) norms2 = flatten(advs2 - x).norms.l2(axis=-1) assert (norms2 <= norms1).all() assert (norms2 < norms1).any() # run the refinement attack attack2 = BinarizationRefinementAttack(included_in="upper") advs2, _, _ = attack2(fmodel, x, y, starting_points=advs, epsilons=None) # make sure the predicted classes didn't change assert (fmodel(advs).argmax(axis=-1) == fmodel(advs2).argmax(axis=-1)).all() # make sure the perturbations didn't get larger and some got smaller norms1 = flatten(advs - x).norms.l2(axis=-1) norms2 = flatten(advs2 - x).norms.l2(axis=-1) assert (norms2 <= norms1).all() assert (norms2 < norms1).any() with pytest.raises(ValueError, match="starting_points"): attack2(fmodel, x, y, epsilons=None) attack2 = BinarizationRefinementAttack(included_in="lower") with pytest.raises(ValueError, match="does not match"): attack2(fmodel, x, y, starting_points=advs, epsilons=None) attack2 = BinarizationRefinementAttack(included_in="invalid") # type: ignore with pytest.raises(ValueError, match="expected included_in"): attack2(fmodel, x, y, starting_points=advs, epsilons=None)
def test_attack_noinit(binarized_bn_adversarial): adv = binarized_bn_adversarial assert adv.perturbed is None attack = BinarizationRefinementAttack() attack(adv) assert adv.perturbed is None
def test_attack(binarized_bn_model, bn_criterion, bn_images, binarized_bn_labels): attack = GradientAttack(binarized_bn_model, bn_criterion) advs = attack(bn_images, binarized_bn_labels, unpack=False) for adv in advs: assert adv.perturbed is not None attack = BinarizationRefinementAttack(binarized_bn_model, bn_criterion) advs2 = attack( bn_images, binarized_bn_labels, unpack=False, individual_kwargs=[{ "starting_point": adv.perturbed } for adv in advs], ) for adv1, adv2 in zip(advs, advs2): v1 = adv1.distance.value v2 = adv2.distance.value assert v2 < v1 < np.inf o = adv2.unperturbed x = adv2.perturbed d = x[x != o] np.testing.assert_allclose(d, 0.5)
def test_attack_wrong_arg(binarized_bn_adversarial): adv = binarized_bn_adversarial attack = GradientAttack() attack(adv) attack = BinarizationRefinementAttack() with pytest.raises(ValueError): attack(adv, included_in='blabla')
def test_attack_fail(bn_adversarial): adv = bn_adversarial attack = GradientAttack() attack(adv) assert adv is not None attack = BinarizationRefinementAttack() with pytest.raises(AssertionError) as e: attack(adv) assert 'thresholding does not match' in str(e.value)
def test_attack_wrong_arg(binarized_bn_model, bn_criterion, bn_images, binarized2_bn_labels): attack = GradientAttack(binarized_bn_model, bn_criterion) advs = attack(bn_images, binarized2_bn_labels, unpack=False) attack = BinarizationRefinementAttack(binarized_bn_model, bn_criterion) with pytest.raises(ValueError): attack( bn_images, binarized2_bn_labels, unpack=False, individual_kwargs=[{ "starting_point": adv.perturbed } for adv in advs], included_in="blabla", )
def test_attack_fail(bn_model, bn_criterion, bn_images, bn_labels): attack = GradientAttack(bn_model, bn_criterion) advs = attack(bn_images, bn_labels, unpack=False) for adv in advs: assert adv is not None attack = BinarizationRefinementAttack(bn_model, bn_criterion) with pytest.raises(AssertionError) as e: attack( bn_images, bn_labels, individual_kwargs=[{ "starting_point": adv.perturbed } for adv in advs], ) assert "threshold does not match" in str(e.value)
def test_attack(binarized_bn_adversarial): adv = binarized_bn_adversarial attack = GradientAttack() attack(adv) v1 = adv.distance.value attack = BinarizationRefinementAttack() attack(adv) v2 = adv.distance.value assert v2 < v1 < np.inf o = adv.unperturbed x = adv.perturbed d = x[x != o] np.testing.assert_allclose(d, 0.5)
def test_attack2(binarized2_bn_adversarial): adv = binarized2_bn_adversarial attack = GradientAttack() attack(adv) v1 = adv.distance.value attack = BinarizationRefinementAttack() attack(adv, included_in='lower') v2 = adv.distance.value assert v2 < v1 < np.inf o = adv.original_image x = adv.image d = x[x != o] np.testing.assert_allclose(d, 0.5)
def test_attack_sp(binarized_bn_adversarial): adv = binarized_bn_adversarial attack = GradientAttack() attack(adv) v1 = adv.distance.value attack = BinarizationRefinementAttack(adv._model) adv = attack(adv.unperturbed, adv.original_class, starting_point=adv.perturbed, unpack=False) v2 = adv.distance.value assert v2 < v1 < np.inf o = adv.unperturbed x = adv.perturbed d = x[x != o] np.testing.assert_allclose(d, 0.5)
def test_attack_noinit(binarized_bn_model, bn_criterion, bn_images, binarized_bn_labels): attack = BinarizationRefinementAttack(binarized_bn_model, bn_criterion) advs = attack(bn_images, binarized_bn_labels, unpack=False) for adv in advs: assert adv.perturbed is None