Beispiel #1
0
def main(args):
    print('==> Loading data..')
    if args['dataset'] == 'mnist':
        (_, _), (x_test,
                 y_test), min_pixel_value, max_pixel_value = load_mnist()
        input_shape = (1, 28, 28)
    else:
        (_, _), (x_test,
                 y_test), min_pixel_value, max_pixel_value = load_cifar10()
        input_shape = (3, 32, 32)

    x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

    print('==> Loading model..')
    model = loadmodel(args)
    model = model.cuda()
    model = model.eval()

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    classifier = PyTorchClassifier(
        model=model,
        clip_values=(min_pixel_value, max_pixel_value),
        loss=criterion,
        optimizer=optimizer,
        input_shape=input_shape,
        nb_classes=10,
    )

    predictions = classifier.predict(x_test[:args['n_samples']])
    clean_accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(
            y_test[:args['n_samples']], axis=1)) / len(
                y_test[:args['n_samples']])
    print("Accuracy on benign test examples: {}%".format(clean_accuracy * 100))

    print("==> Evaluate the classifier on adversarial test examples")
    queries = [100, 200, 500]
    acc = attackmodel(args, classifier, x_test[:args['n_samples']],
                      y_test[:args['n_samples']], queries)
    np.save("./pgd_results/" + args['dataset'] + args['save'], np.array(acc))
    print("The adjusted accuracies are:")
    print(acc)
Beispiel #2
0
def main():
    with open('data.json') as data_json:
        data_params = json.load(data_json)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str)
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--pretrained', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks'])
    parser.add_argument('--eps', type=float, default=0.3)
    # NOTE: In CW_L2 attack, eps is the upper bound of c.
    parser.add_argument('--n_samples', type=int, default=2000)
    parser.add_argument('--random_state', type=int, default=1234)
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)
    
    if not os.path.exists(args.output_path):
        print('Output folder does not exist. Create:', args.output_path)
        os.mkdir(args.output_path)
        
    print('Dataset:', args.data)
    print('Pretrained model:', args.pretrained)
    print('Running attack: {}'.format(args.attack))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])

    if args.data == 'mnist':
        dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms)
    elif args.data == 'cifar10':
        dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms)
    else:
        data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name'])
        print('Read file:', data_path)
        X, y = load_csv(data_path)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=data_params['data'][args.data]['n_test'],
            random_state=args.random_state)
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
        dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long))

    dataloader_train = DataLoader(dataset_train, 256, shuffle=False)
    dataloader_test = DataLoader(dataset_test, 256, shuffle=False)

    shape_train = get_shape(dataloader_train.dataset)
    shape_test = get_shape(dataloader_test.dataset)
    print('Train set:', shape_train)
    print('Test set:', shape_test)

    # Load model
    use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf']
    print('Attack:', args.attack)
    print('Using softmax layer:', use_prob)
    if args.data == 'mnist':
        model = BaseModel(use_prob=use_prob).to(device)
        model_name = 'basic'
    elif args.data == 'cifar10':
        model_name = args.pretrained.split('_')[1]
        if model_name == 'resnet':
            model = Resnet(use_prob=use_prob).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=use_prob).to(device)
        else:
            raise ValueError('Unknown model: {}'.format(model_name))
    else:
        n_features = data_params['data'][args.data]['n_features']
        n_classes = data_params['data'][args.data]['n_classes']
        model = NumericModel(
            n_features,
            n_hidden=n_features * 4,
            n_classes=n_classes,
            use_prob=use_prob).to(device)
        model_name = 'basic' + str(n_features * 4)

    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    pretrained_path = os.path.join(args.output_path, args.pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))

    _, acc_train = validate(model, dataloader_train, loss, device)
    _, acc_test = validate(model, dataloader_test, loss, device)
    print('Accuracy on train set: {:.4f}%'.format(acc_train * 100))
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    # Create a subset which only contains recognisable samples.
    tensor_test_X, tensor_test_y = get_correct_examples(
        model, dataset_test, device=device, return_tensor=True)
    dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y)
    loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader_perfect, loss, device)
    print('Accuracy on {} filtered test examples: {:.4f}%'.format(
        len(dataset_perfect), acc_perfect * 100))

    # Generate adversarial examples
    n_features = data_params['data'][args.data]['n_features']
    n_classes = data_params['data'][args.data]['n_classes']
    if isinstance(n_features, int):
        n_features = (n_features,)

    classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=n_features,
        optimizer=optimizer,
        nb_classes=n_classes,
        clip_values=(0.0, 1.0),
        device_type='gpu')

    if args.attack == 'apgd':
        eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd1':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=1,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd2':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=2,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'bim':
        eps_step = args.eps / 10.0
        attack = BasicIterativeMethod(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'boundary':
        attack = BoundaryAttack(
            estimator=classifier,
            max_iter=1000,
            sample_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cw2':
        # NOTE: Do NOT increase the batch size!
        attack = CarliniWagnerAttackL2(
            model=model,
            n_classes=n_classes,
            confidence=args.eps,
            verbose=True,
            check_prob=False,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cwinf':
        attack = CarliniLInfMethod(
            classifier=classifier,
            confidence=args.eps,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'deepfool':
        attack = DeepFool(
            classifier=classifier,
            epsilon=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'fgsm':
        attack = FastGradientMethod(
            estimator=classifier,
            eps=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'jsma':
        attack = SaliencyMapMethod(
            classifier=classifier,
            gamma=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'line':
        if args.data == 'mnist':
            color = args.eps
        elif args.data == 'cifar10':
            color = (args.eps, args.eps, args.eps)
        else:
            raise NotImplementedError
        attack = LineAttack(color=color, thickness=1)
    elif args.attack == 'shadow':
        attack = ShadowAttack(
            estimator=classifier,
            batch_size=args.batch_size,
            targeted=False,
            verbose=False)
    elif args.attack == 'watermark':
        attack = WaterMarkAttack(
            eps=args.eps,
            n_classes=data_params['data'][args.data]['n_classes'],
            x_min=0.0,
            x_max=1.0,
            targeted=False)

        X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
        X_train = X_train.cpu().detach().numpy()
        y_train = y_train.cpu().detach().numpy()
        attack.fit(X_train, y_train)
    else:
        raise NotImplementedError

    if len(dataset_perfect) > args.n_samples:
        n = args.n_samples
    else:
        n = len(dataset_perfect)

    X_benign = tensor_test_X[:n].cpu().detach().numpy()
    y = tensor_test_y[:n].cpu().detach().numpy()

    print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps))
    time_start = time.time()
    # Shadow attack only takes single sample!
    if args.attack == 'shadow':
        adv = np.zeros_like(X_benign)
        for i in trange(len(X_benign)):
            adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0))
    elif args.attack == 'watermark':
        # This is untargeted.
        adv = attack.generate(X_benign, y)
    else:
        adv = attack.generate(x=X_benign)
    time_elapsed = time.time() - time_start
    print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed))))

    pred_benign = np.argmax(classifier.predict(X_benign), axis=1)
    acc_benign = np.sum(pred_benign == y) / n
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.sum(pred_adv == y) / n
    print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100))
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    # Save results
    if args.n_samples < 2000:
        output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples)
    else:
        output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps))

    path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file))
    path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file))
    path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file))
    np.save(path_x, X_benign)
    np.save(path_y, y)
    np.save(path_adv, adv)

    print('Saved to:', '{}_adv.npy'.format(output_file))
    print()
Beispiel #3
0
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 3: Create the ART classifier

classifier = PyTorchClassifier(
    model=model,
    clip_values=(0, 1),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

classifier.fit(x_train, y_train, batch_size=128, nb_epochs=5)

predictions = classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))


def calculate_l0(batch_original, batch_adversarial, dim):
    # image_original==x_test_adv
    matrix_bool = batch_original == batch_adversarial
    inverse_matrix = np.logical_not(matrix_bool)
    l0 = np.count_nonzero(inverse_matrix, axis=dim)
    return l0


def calculate_l2(batch_original, batch_adversarial):
    return np.linalg.norm(batch_original - batch_adversarial)
data = torch.reshape(data, [-1, 1, 57, 47])

print("The size of the input is:")
print(data.shape)

data, target = data.to(device), target.to(device)
"""
    White-Box Classifier
"""
classifier = PyTorchClassifier(model=model,
                               input_shape=(data.shape),
                               nb_classes=40,
                               loss=nn.CrossEntropyLoss(),
                               device_type="cpu")

original_predictions = classifier.predict(data)

accuracy = np.sum(
    np.argmax(original_predictions, axis=1) == np.argmax(
        test_set_y, axis=1)) / test_set_y.shape[0]
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Generate adversarial test examples
"""
    White-Box Attacks
"""
# FGSM
"""attacker = FastGradientMethod(
    estimator=classifier,
    eps=0.1
    )"""
Beispiel #5
0
def attack_pgd_targeted(dataloader, model, model_info, args, checkpoint_dir):
    """
    PGD attack
    Need to change a few things
    """
    device = args.device
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    img_size = model_info["model_img_size"]
    n_classes = model_info["num_classes"]

    classifier = PyTorchClassifier(
        model=model,
        loss=criterion,
        clip_values=(0.0, 1.0),
        optimizer=optimizer,
        input_shape=(img_size, img_size),
        nb_classes=n_classes,
        device_type=device,
    )
    attack = ProjectedGradientDescentPyTorch(
        estimator=classifier,
        norm=1,
        eps=500,
        eps_step=5,
        max_iter=100,
        targeted=True,
        num_random_init=0,
        batch_size=args.batch_size,
        random_eps=False,
    )

    # Launching a targeted attack
    t = args.target_class
    print(f"Launching attack for target {t}")
    dest_images = os.path.join(checkpoint_dir, f"target_{t}")
    os.makedirs(dest_images, exist_ok=True)

    for data in tqdm(dataloader):
        sample, label, img_path = data

        # Launch attack
        target = np.full_like(label, t)
        target_labels = (
            np.arange(n_classes) == target[:, None]).astype("float32")

        sample_adv = attack.generate(x=sample, y=target_labels)
        prediction = np.argmax(classifier.predict(sample_adv), axis=1)

        # Code to save these images
        img_path = [it.split("/")[-1] for it in img_path]

        #s_indexes = sorted(range(len(img_path)), key=lambda k: img_path[k])
        #[print(img_path[i]) for i in s_indexes]
        #print(label[s_indexes])
        #print(prediction[s_indexes])

        for i in range(len(sample_adv)):
            np.save(
                os.path.join(dest_images, img_path[i].replace("png", "npy")),
                sample_adv[i])

            _img = sample_adv[i].transpose(1, 2, 0)
            skimage.io.imsave(os.path.join(dest_images, img_path[i]),
                              img_as_ubyte(_img))

        #with open(os.path.join(dest_images, "stats.txt"), "w") as f:
        #    f.write(f"Fooling-rate was nan\n")

    return dest_images