def test_inversion_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = InversionAttack(fmodel)
    advs = attack(x, y)

    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert (y_advs == y).float().mean() < 1
Ejemplo n.º 2
0
def test_deepfool(Attack, loss):
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = Attack(fmodel)
    advs = attack(x, y, loss=loss)

    perturbations = ep.astensor(advs - x)
    norms = flatten(perturbations).square().sum(axis=-1).sqrt()
    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert norms.max().item() <= 40.0 + 1e-7
    assert (y_advs == y).float().mean() < 1
Ejemplo n.º 3
0
def test_gaussian_blur_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            # instead of our usual model that's robust to the BlurAttack,
            # we use a slighlty different model that can be attacked
            x = x[:, :, 1:, :] - x[:, :, :-1, :]
            x = x[:, :, :, 1:] - x[:, :, :, :-1]
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = GaussianBlurAttack(fmodel, channel_axis=1)
    advs = attack(x, y)

    perturbations = ep.astensor(advs - x)
    norms = flatten(perturbations).square().sum(axis=-1).sqrt()
    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert norms.max().item() <= 20.0 + 1e-7
    assert (y_advs == y).float().mean() < 1
Ejemplo n.º 4
0
def test_linf_fast_gradient_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = LinfinityFastGradientAttack(fmodel)
    advs = attack(x, y, rescale=False, epsilon=0.3)

    perturbations = ep.astensor(advs - x)
    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert perturbations.abs().max() <= 0.3 + 1e-7
    assert (y_advs == y).float().mean() < 1
def test_l1_brendel_bethge_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = L1BrendelBethgeAttack(fmodel)
    advs = attack(x, y, steps=100, lr_num_decay=10)

    perturbations = ep.astensor(advs - x)
    norms = flatten(perturbations).abs().sum(axis=-1)
    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert norms.max().item() <= 32 * 32 * 3 / 2
    assert (y_advs == y).float().mean() < 1e-5
def test_saltandpepper_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = SaltAndPepperNoiseAttack(fmodel, channel_axis=1)
    advs = attack(x, y, criterion=misclassification)

    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert (y_advs == y).float().mean() == 0.0
def test_repeated_additive_noise_attack(Attack):
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds, size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = Attack(fmodel)
    advs = attack(x, y, epsilon=20.0, criterion=misclassification)

    y_advs = fmodel.forward(advs).argmax(axis=-1)

    assert x.shape == advs.shape
    assert (y_advs == y).float().mean() < 0.5
def test_binary_search_contrast_reduction_attack():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = x.clone()
            x[x >= 0.5] = 1.0
            x[x < 0.5] = 0.0
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = BinarySearchContrastReductionAttack(fmodel)
    advs = attack(x, y)

    perturbations = ep.astensor(advs - x)
    norms = flatten(perturbations).square().sum(axis=-1).sqrt()
    y_advs = fmodel.forward(advs).argmax(axis=-1)
    assert (y_advs == y).float().mean() < 1

    attack2 = BinarizationRefinementAttack(fmodel)
    advs2 = attack2(x, y, adversarials=advs, criterion=misclassification)

    perturbations2 = ep.astensor(advs2 - x)
    norms2 = flatten(perturbations2).square().sum(axis=-1).sqrt()
    y_advs2 = fmodel.forward(advs2).argmax(axis=-1)
    assert (y_advs == y_advs2).all()
    assert (norms2 <= norms).all()
    assert (norms2 < norms).any()
Ejemplo n.º 9
0
def test_ead_attack_cw():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attack = EADAttack(fmodel)
    cw_attack = L2CarliniWagnerAttack(fmodel)
    advs = attack(x,
                  y,
                  regularization=0,
                  binary_search_steps=5,
                  max_iterations=1000)
    advs_cw = cw_attack(x, y, binary_search_steps=5, max_iterations=1000)

    perturbations = ep.astensor(advs - x)
    perturbations_cw = ep.astensor(advs_cw - x)
    y_advs = fmodel.forward(advs).argmax(axis=-1)
    y_advs_cw = fmodel.forward(advs).argmax(axis=-1)

    diff = flatten(perturbations -
                   perturbations_cw).square().sum(axis=-1).sqrt()

    assert x.shape == advs.shape
    assert diff.max().item() <= 40.0 + 1e-7
    assert (y_advs == y_advs_cw).float().mean() == 1
Ejemplo n.º 10
0
def fmodel():
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)
    return fmodel
Ejemplo n.º 11
0
def test_evaluate():
    channels = 3
    batch_size = 8
    h = w = 32
    bounds = (0, 1)

    class Model(nn.Module):
        def forward(self, x):
            x = torch.mean(x, 3)
            x = torch.mean(x, 2)
            return x

    model = Model().eval()
    fmodel = PyTorchModel(model, bounds=bounds)

    np.random.seed(0)
    x = np.random.uniform(*bounds,
                          size=(batch_size, channels, h, w)).astype(np.float32)
    x = torch.from_numpy(x).to(fmodel.device)
    y = fmodel.forward(x).argmax(axis=-1)

    attacks = [
        L2BasicIterativeAttack,
        L2CarliniWagnerAttack,
        L2ContrastReductionAttack,
        BinarySearchContrastReductionAttack,
        LinearSearchContrastReductionAttack,
    ]
    epsilons = [0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0]

    acc = accuracy(fmodel, x, y)
    assert acc > 0
    _, robust_accuracy = evaluate_l2(fmodel,
                                     x,
                                     y,
                                     attacks=attacks,
                                     epsilons=epsilons)

    assert robust_accuracy[0] == acc
    assert robust_accuracy[-1] == 0.0