def test_inversion_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = InversionAttack(fmodel) advs = attack(x, y) y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert (y_advs == y).float().mean() < 1
def test_deepfool(Attack, loss): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = Attack(fmodel) advs = attack(x, y, loss=loss) perturbations = ep.astensor(advs - x) norms = flatten(perturbations).square().sum(axis=-1).sqrt() y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert norms.max().item() <= 40.0 + 1e-7 assert (y_advs == y).float().mean() < 1
def test_gaussian_blur_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): # instead of our usual model that's robust to the BlurAttack, # we use a slighlty different model that can be attacked x = x[:, :, 1:, :] - x[:, :, :-1, :] x = x[:, :, :, 1:] - x[:, :, :, :-1] x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = GaussianBlurAttack(fmodel, channel_axis=1) advs = attack(x, y) perturbations = ep.astensor(advs - x) norms = flatten(perturbations).square().sum(axis=-1).sqrt() y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert norms.max().item() <= 20.0 + 1e-7 assert (y_advs == y).float().mean() < 1
def test_linf_fast_gradient_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = LinfinityFastGradientAttack(fmodel) advs = attack(x, y, rescale=False, epsilon=0.3) perturbations = ep.astensor(advs - x) y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert perturbations.abs().max() <= 0.3 + 1e-7 assert (y_advs == y).float().mean() < 1
def test_l1_brendel_bethge_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = L1BrendelBethgeAttack(fmodel) advs = attack(x, y, steps=100, lr_num_decay=10) perturbations = ep.astensor(advs - x) norms = flatten(perturbations).abs().sum(axis=-1) y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert norms.max().item() <= 32 * 32 * 3 / 2 assert (y_advs == y).float().mean() < 1e-5
def test_saltandpepper_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = SaltAndPepperNoiseAttack(fmodel, channel_axis=1) advs = attack(x, y, criterion=misclassification) y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert (y_advs == y).float().mean() == 0.0
def test_repeated_additive_noise_attack(Attack): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = Attack(fmodel) advs = attack(x, y, epsilon=20.0, criterion=misclassification) y_advs = fmodel.forward(advs).argmax(axis=-1) assert x.shape == advs.shape assert (y_advs == y).float().mean() < 0.5
def test_binary_search_contrast_reduction_attack(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = x.clone() x[x >= 0.5] = 1.0 x[x < 0.5] = 0.0 x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = BinarySearchContrastReductionAttack(fmodel) advs = attack(x, y) perturbations = ep.astensor(advs - x) norms = flatten(perturbations).square().sum(axis=-1).sqrt() y_advs = fmodel.forward(advs).argmax(axis=-1) assert (y_advs == y).float().mean() < 1 attack2 = BinarizationRefinementAttack(fmodel) advs2 = attack2(x, y, adversarials=advs, criterion=misclassification) perturbations2 = ep.astensor(advs2 - x) norms2 = flatten(perturbations2).square().sum(axis=-1).sqrt() y_advs2 = fmodel.forward(advs2).argmax(axis=-1) assert (y_advs == y_advs2).all() assert (norms2 <= norms).all() assert (norms2 < norms).any()
def test_ead_attack_cw(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attack = EADAttack(fmodel) cw_attack = L2CarliniWagnerAttack(fmodel) advs = attack(x, y, regularization=0, binary_search_steps=5, max_iterations=1000) advs_cw = cw_attack(x, y, binary_search_steps=5, max_iterations=1000) perturbations = ep.astensor(advs - x) perturbations_cw = ep.astensor(advs_cw - x) y_advs = fmodel.forward(advs).argmax(axis=-1) y_advs_cw = fmodel.forward(advs).argmax(axis=-1) diff = flatten(perturbations - perturbations_cw).square().sum(axis=-1).sqrt() assert x.shape == advs.shape assert diff.max().item() <= 40.0 + 1e-7 assert (y_advs == y_advs_cw).float().mean() == 1
def fmodel(): bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) return fmodel
def test_evaluate(): channels = 3 batch_size = 8 h = w = 32 bounds = (0, 1) class Model(nn.Module): def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() fmodel = PyTorchModel(model, bounds=bounds) np.random.seed(0) x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32) x = torch.from_numpy(x).to(fmodel.device) y = fmodel.forward(x).argmax(axis=-1) attacks = [ L2BasicIterativeAttack, L2CarliniWagnerAttack, L2ContrastReductionAttack, BinarySearchContrastReductionAttack, LinearSearchContrastReductionAttack, ] epsilons = [0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0] acc = accuracy(fmodel, x, y) assert acc > 0 _, robust_accuracy = evaluate_l2(fmodel, x, y, attacks=attacks, epsilons=epsilons) assert robust_accuracy[0] == acc assert robust_accuracy[-1] == 0.0