def test_base_attack(model, criterion, image, label): attack = attacks.FGSM(model, criterion) assert attack.name() == 'GradientSignAttack' with pytest.raises(ValueError): attack(image=image) with pytest.raises(TypeError): attack(label=label) wrong_label = label + 1 adv = attack(image=image, label=label) assert adv is None adv = attack(image=image, label=wrong_label) assert adv.shape == image.shape adv = attack(image=image, label=wrong_label, unpack=False) assert adv.image.shape == image.shape adv = Adversarial(model, criterion, image, wrong_label) adv = attack(adv) assert adv.shape == image.shape adv = Adversarial(model, criterion, image, wrong_label) with pytest.raises(ValueError): attack(adv, label=wrong_label) attack = attacks.FGSM() with pytest.raises(ValueError): attack(image=image, label=wrong_label)
def bn_trivial(): criterion = bn_trivial_criterion() image = bn_image() label = bn_label() cm_model = contextmanager(bn_model) with cm_model() as model: adv = Adversarial(model, criterion, image, label) # the original should not yet be considered adversarial # so that the attack implementation is actually called adv._Adversarial__best_adversarial = None adv._Adversarial__best_distance = MSE(value=np.inf) yield adv
def bn_impossible(): criterion = bn_impossible_criterion() image = bn_image() label = bn_label() cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def gl_bn_adversarial(): criterion = bn_criterion() image = bn_image() label = bn_label() cm_model = contextmanager(gl_bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def binarized2_bn_adversarial(bn_criterion, bn_image, binarized2_bn_label): criterion = bn_criterion image = bn_image label = binarized2_bn_label cm_model = contextmanager(binarized2_bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def bn_targeted_adversarial(bn_targeted_criterion, bn_image, bn_label): criterion = bn_targeted_criterion image = bn_image label = bn_label cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def bn_targeted_adversarial_pytorch(): model = bn_model_pytorch() criterion = bn_targeted_criterion() image = bn_image_pytorch() label = bn_label_pytorch() adv = Adversarial(model, criterion, image, label) assert adv.image is None assert adv.distance.value == np.inf return adv
def bn_adversarial_mae(): criterion = bn_criterion() image = bn_image() label = bn_label() distance = MAE cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label, distance=distance)
def bn_adversarial_linf(bn_criterion, bn_image, bn_label): criterion = bn_criterion image = bn_image label = bn_label distance = Linfinity cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label, distance=distance)
def eg_bn_adversarial(request): criterion = bn_criterion() image = bn_image() label = bn_label() eg_bn_model = eg_bn_model_factory(request) cm_model = contextmanager(eg_bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def bn_targeted_adversarial_pytorch(bn_model_pytorch, bn_targeted_criterion, bn_image_pytorch, bn_label_pytorch): model = bn_model_pytorch criterion = bn_targeted_criterion image = bn_image_pytorch label = bn_label_pytorch adv = Adversarial(model, criterion, image, label) assert adv.perturbed is None assert adv.distance.value == np.inf return adv
def test_adversarial(model, criterion, image, label): # model = bn_model # criterion = bn_criterion # image = bn_image # label = bn_label adversarial = Adversarial(model, criterion, image, label) assert not adversarial.predictions(image)[1] assert adversarial.image is None assert adversarial.distance == MSE(value=np.inf) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 label = 22 # wrong label adversarial = Adversarial(model, criterion, image, label) assert adversarial.image is not None assert adversarial.distance == MSE(value=0) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 predictions, is_adversarial = adversarial.predictions(image) first_predictions = predictions assert is_adversarial predictions, is_adversarial = adversarial.batch_predictions( image[np.newaxis]) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert np.all(is_adversarial == np.array([True])) predictions, is_adversarial, index = adversarial.batch_predictions( image[np.newaxis], increasing=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, gradient, is_adversarial = adversarial.predictions_and_gradient( image, label) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial = adversarial.predictions_and_gradient( ) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial gradient = adversarial.gradient() assert gradient.shape == image.shape assert is_adversarial assert adversarial.num_classes() == 1000 assert adversarial.has_gradient() assert adversarial.channel_axis(batch=True) == 3 assert adversarial.channel_axis(batch=False) == 2 # without adversarials criterion.is_adversarial = Mock(return_value=False) adversarial = Adversarial(model, criterion, image, label) predictions, is_adversarial, index = adversarial.batch_predictions( image[np.newaxis], increasing=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert not is_adversarial assert index is None # without gradient del model.predictions_and_gradient assert not adversarial.has_gradient()
def test_adversarial(model, criterion, image, label): # model = bn_model # criterion = bn_criterion # image = bn_image # label = bn_label adversarial = Adversarial(model, criterion, image, label, verbose=False) assert not adversarial.predictions(image)[1] assert adversarial.image is None assert adversarial.distance == MSE(value=np.inf) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 np.random.seed(22) perturbation = np.random.uniform(-1, 1, size=image.shape) perturbed = np.clip(image + perturbation, 0, 255).astype(np.float32) d1 = adversarial.normalized_distance(perturbed).value assert d1 != 0 assert adversarial.original_image.dtype == np.float32 adversarial.set_distance_dtype(np.float32) assert adversarial.normalized_distance(perturbed).value == d1 adversarial.set_distance_dtype(np.float64) assert adversarial.normalized_distance(perturbed).value != d1 adversarial.reset_distance_dtype() assert adversarial.normalized_distance(perturbed).value == d1 label = 22 # wrong label adversarial = Adversarial(model, criterion, image, label, verbose=True) assert adversarial.image is not None assert adversarial.distance == MSE(value=0) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 predictions, is_adversarial = adversarial.predictions(image) first_predictions = predictions assert is_adversarial predictions, is_adversarial, _, _ = adversarial.predictions(image, return_details=True) # noqa: E501 first_predictions = predictions assert is_adversarial predictions, is_adversarial = adversarial.batch_predictions(image[np.newaxis]) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert np.all(is_adversarial == np.array([True])) predictions, is_adversarial, index = adversarial.batch_predictions(image[np.newaxis], greedy=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, is_adversarial, index, _, _ = adversarial.batch_predictions(image[np.newaxis], greedy=True, return_details=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, gradient, is_adversarial = adversarial.predictions_and_gradient(image, label) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial, _, _ = adversarial.predictions_and_gradient(image, label, return_details=True) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial = adversarial.predictions_and_gradient() # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial gradient_pre = np.ones_like(predictions) * 0.3 gradient = adversarial.backward(gradient_pre, image) gradient2 = adversarial.backward(gradient_pre) assert gradient.shape == image.shape assert (gradient == gradient2).all() gradient = adversarial.gradient() assert gradient.shape == image.shape assert is_adversarial assert adversarial.num_classes() == 1000 assert adversarial.has_gradient() assert adversarial.channel_axis(batch=True) == 3 assert adversarial.channel_axis(batch=False) == 2 # without adversarials criterion.is_adversarial = Mock(return_value=False) adversarial = Adversarial(model, criterion, image, label) predictions, is_adversarial, index = adversarial.batch_predictions(image[np.newaxis], greedy=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert not is_adversarial assert index is None # without gradient del model.predictions_and_gradient assert not adversarial.has_gradient()
def test_adversarial(model, criterion, image, label): # model = bn_model # criterion = bn_criterion # image = bn_image # label = bn_label adversarial = Adversarial(model, criterion, image, label, verbose=False) assert not adversarial.predictions(image)[1] assert adversarial.image is None assert adversarial.distance == MSE(value=np.inf) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 np.random.seed(22) perturbation = np.random.uniform(-1, 1, size=image.shape) perturbed = np.clip(image + perturbation, 0, 255).astype(np.float32) d1 = adversarial.normalized_distance(perturbed).value assert d1 != 0 assert adversarial.original_image.dtype == np.float32 adversarial.set_distance_dtype(np.float32) assert adversarial.normalized_distance(perturbed).value == d1 adversarial.set_distance_dtype(np.float64) assert adversarial.normalized_distance(perturbed).value != d1 adversarial.reset_distance_dtype() assert adversarial.normalized_distance(perturbed).value == d1 label = 22 # wrong label adversarial = Adversarial(model, criterion, image, label, verbose=True) assert adversarial.image is not None assert adversarial.distance == MSE(value=0) assert adversarial.original_image is image assert adversarial.original_class == label assert adversarial.target_class() is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 predictions, is_adversarial = adversarial.predictions(image) first_predictions = predictions assert is_adversarial predictions, is_adversarial, _, _ = adversarial.predictions( image, return_details=True) # noqa: E501 first_predictions = predictions assert is_adversarial predictions, is_adversarial = adversarial.batch_predictions( image[np.newaxis]) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert np.all(is_adversarial == np.array([True])) predictions, is_adversarial, index = adversarial.batch_predictions( image[np.newaxis], greedy=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, is_adversarial, index, _, _ = adversarial.batch_predictions( image[np.newaxis], greedy=True, return_details=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, gradient, is_adversarial = adversarial.predictions_and_gradient( image, label) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial, _, _ = adversarial.predictions_and_gradient( image, label, return_details=True) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial = adversarial.predictions_and_gradient( ) # noqa: E501 assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial gradient_pre = np.ones_like(predictions) * 0.3 gradient = adversarial.backward(gradient_pre, image) gradient2 = adversarial.backward(gradient_pre) assert gradient.shape == image.shape assert (gradient == gradient2).all() gradient = adversarial.gradient() assert gradient.shape == image.shape assert is_adversarial assert adversarial.num_classes() == 1000 assert adversarial.has_gradient() assert adversarial.channel_axis(batch=True) == 3 assert adversarial.channel_axis(batch=False) == 2 # without adversarials criterion.is_adversarial = Mock(return_value=False) adversarial = Adversarial(model, criterion, image, label) predictions, is_adversarial, index = adversarial.batch_predictions( image[np.newaxis], greedy=True) # noqa: E501 assert (predictions == first_predictions[np.newaxis]).all() assert not is_adversarial assert index is None # without gradient del model.predictions_and_gradient assert not adversarial.has_gradient()
def test_early_stopping(bn_model, bn_criterion, bn_image, bn_label): attack = attacks.FGSM() model = bn_model criterion = bn_criterion image = bn_image label = bn_label wrong_label = label + 1 adv = Adversarial(model, criterion, image, wrong_label) attack(adv) assert adv.distance.value == 0 assert not adv.reached_threshold() # because no threshold specified adv = Adversarial(model, criterion, image, wrong_label, threshold=1e10) attack(adv) assert adv.distance.value == 0 assert adv.reached_threshold() adv = Adversarial(model, criterion, image, label) attack(adv) assert adv.distance.value > 0 assert not adv.reached_threshold() # because no threshold specified c = adv._total_prediction_calls d = adv.distance.value large_d = 10 * d small_d = d / 2 adv = Adversarial(model, criterion, image, label, threshold=adv._distance(value=large_d)) attack(adv) assert 0 < adv.distance.value <= large_d assert adv.reached_threshold() assert adv._total_prediction_calls < c adv = Adversarial(model, criterion, image, label, threshold=large_d) attack(adv) assert 0 < adv.distance.value <= large_d assert adv.reached_threshold() assert adv._total_prediction_calls < c adv = Adversarial(model, criterion, image, label, threshold=small_d) attack(adv) assert small_d < adv.distance.value <= large_d assert not adv.reached_threshold() assert adv._total_prediction_calls == c assert adv.distance.value == d adv = Adversarial(model, criterion, image, label, threshold=adv._distance(value=large_d)) attack(adv) assert adv.reached_threshold() c = adv._total_prediction_calls attack(adv) assert adv._total_prediction_calls == c # no new calls
def bn_targeted_adversarial_pytorch(): model = bn_model_pytorch() criterion = bn_targeted_criterion() image = bn_image_pytorch() label = bn_label() return Adversarial(model, criterion, image, label)
# FOR TESTING OF SIMPLIFIED ALGORITHM source_label = labels[idx] #adversarial_label = int(np.argmax(fmodel.predictions(adversarial))) #adversarial = torch.from_numpy(adversarial) ##adversarial = attack(image.numpy(), source_label.numpy()) ##adversarial = torch.from_numpy(adversarial) #''' # get Goodfellow adversarial amodel = attack._default_model acriterion = attack._default_criterion adistance = attack._default_distance athreshold = attack._default_threshold adv_obj = Adversarial(amodel, acriterion, image.numpy(), source_label.numpy(), distance=adistance, threshold=athreshold) signed_gradient = attack._gradient(adv_obj) adversarial = image.numpy() + signed_gradient * 0.01 adversarial = torch.from_numpy(adversarial) #adversarial_label = int(np.argmax(fmodel.predictions(adversarial))) #''' true_boundary_images[idx] = adversarial index = torch.tensor([source_label]) true_boundary_labels[idx] = torch.eye(num_classes)[index] #true_boundary_images[idx] = torch.from_numpy(adversarial) #boundary_label_values = torch.tensor([0.5, 0.5]) #index = torch.tensor([adversarial_label, source_label])