l1, 0)
        elif args.experiment_type == "VGG":
            model = convolutional.vgg_model_wide(args.dataset, 0, l1, 0)
        elif args.experiment_type == "leNet":
            model = convolutional.leNet_model_wide(0, l1, 0)
        else:
            raise Exception("Invalid model!")

        model.fit(x_train, y_train, epochs=50, batch_size=128)
        preds = np.argmax(model.predict(x_test), axis=1)

        kmodel = KerasModel(model=model, bounds=(min_, max_))

        attack = None
        if args.attack_type == 'l2':
            attack = CarliniWagnerL2Attack(kmodel, TargetClass(7))
        elif args.attack_type == 'linf':
            attack = RandomPGD(kmodel, TargetClass(7))

        x_sample = np.take(x_test, ones, axis=0)

        # We exclude by default those examples which are not predicted by the classifier as 1s.
        true_ones = np.where(preds == 1)[0]

        x_sample = np.take(x_sample, true_ones, axis=0)
        y_sample = np.array([to_one_hot(1) for _ in x_sample])

        adversarial = None
        if args.attack_type == 'l2':
            adversarial = attack(x_sample,
                                 np.argmax(y_sample, axis=1),
Ejemplo n.º 2
0
failure_rate = []

linear_mmd = []

acc = []

for dropout in dr:
    kmodel = convolutional.mini_VGG_foolbox(dropout, dropout, 0, "cifar10")

    # kmodel.fit(x_train, y_train, epochs=1, batch_size=128)
    kmodel.fit(x_train, y_train, epochs=100, batch_size=128)

    preds = np.argmax(kmodel.predict(x_test), axis=1)

    attack = CarliniWagnerL2Attack(kmodel, Misclassification())

    # x_sample = x_test[:10]
    # y_sample = y_test[:10]
    x_sample = x_test[:1000]
    y_sample = y_test[:1000]

    adversarial = attack(x_sample,
                         np.argmax(y_sample, axis=1),
                         binary_search_steps=5,
                         max_iterations=600)

    # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters,
    # we exclude them from the perturbation evaluation.

    failed = 0
def main():
    args = parser.parse_args()
    if not os.path.isdir('CMDs'):
        os.mkdir('CMDs')
    with open('CMDs/construct_adversarial_attack.cmd', 'a') as f:
        f.write(' '.join(sys.argv) + '\n')
        f.write('--------------------------------\n')
    if os.path.isdir(args.output_path) and not args.overwrite:
        print(f'Directory {args.output_path} exists. Exiting...')
        sys.exit()
    elif os.path.isdir(args.output_path) and args.overwrite:
        os.remove(args.output_path + '/*')
    else:
        os.makedirs(args.output_path)
        os.makedirs(os.path.join(args.output_path, 'images'))

    # Check that we are using a sensible GPU
    device = select_gpu(args.gpu)

    # Load up the model
    model_dir = Path(args.model_dir)
    ckpt = torch.load(os.path.join(model_dir, 'model/model.tar'),
                      map_location=device)
    model = ModelFactory.model_from_checkpoint(ckpt)
    model.to(device)
    model.eval()

    # Wrap model with a Foolbox wrapper.
    mean = np.array([0.4914, 0.4823, 0.4465]).reshape((3, 1, 1))
    std = np.array([0.247, 0.243, 0.261]).reshape((3, 1, 1))

    fmodel = PyTorchModel(model,
                          bounds=(0, 1),
                          num_classes=ckpt['num_classes'],
                          preprocessing=(mean, std))

    # Load the evaluation data
    if args.train:
        dataset = DATASET_DICT[args.dataset](root=args.data_path,
                                             transform=construct_transforms(
                                                 n_in=ckpt['n_in'],
                                                 mode='train'),
                                             target_transform=None,
                                             download=True,
                                             split='train')
    else:
        dataset = DATASET_DICT[args.dataset](root=args.data_path,
                                             transform=construct_transforms(
                                                 n_in=ckpt['n_in'],
                                                 mode='eval'),
                                             target_transform=None,
                                             download=True,
                                             split='test')

    loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=1)

    # Construct adversarial attack
    if args.attack == 'CWL2':
        if args.adaptive:
            attack = AdaptiveCarliniWagnerL2Attack(model=fmodel)
        else:
            attack = CarliniWagnerL2Attack(model=fmodel)
    elif args.attack == 'EAD':
        if args.adaptive:
            attack = AdaptiveEADAttack(model=fmodel)
        else:
            attack = EADAttack(model=fmodel)
    else:
        raise NotImplementedError

    adversarials = []
    for i, data in enumerate(loader):
        start = time.time()
        images, labels = data
        images = images.numpy()
        labels = labels.numpy()
        adversarials.extend(attack(inputs=images, labels=labels, unpack=False))
        print(
            f"Batch {i}/{len(loader)} took {np.round((time.time() - start) / 60.0, 1)} minutes."
        )

    adv_labels = np.stack(
        [adversarial.adversarial_class for adversarial in adversarials],
        axis=0)
    labels = np.stack(
        [adversarial.original_class for adversarial in adversarials], axis=0)
    distances = np.stack(
        [adversarial.distance for adversarial in adversarials], axis=0)
    logits = np.stack([adversarial.output for adversarial in adversarials],
                      axis=0)

    np.savetxt(os.path.join(args.output_path, 'labels.txt'),
               labels,
               dtype=np.int32)
    np.savetxt(os.path.join(args.output_path, 'adv_labels.txt'),
               adv_labels,
               dtype=np.int32)
    np.savetxt(os.path.join(args.output_path, 'logits.txt'),
               logits,
               dtype=np.float32)
    np.savetxt(os.path.join(args.output_path, 'distances.txt'),
               distances,
               dtype=np.float32)

    accuracy = np.mean(np.asarray(labels == adv_labels, dtype=np.float32))
    sr = np.mean(np.asarray(labels != adv_labels, dtype=np.float32))
    with open(os.path.join(args.output_path, 'results.txt'), 'a') as f:
        f.write(
            f'Classification Error: {np.round(100 * (1.0 - accuracy), 1)} \n')
        f.write(f'Success Rate: {np.round(100 * sr, 1)} \n')

    print("Saving images to folder...")
    adversarial_images = np.stack(
        [adversarial.perturbed for adversarial in adversarials], axis=0)
    for i, image in enumerate([
            np.asarray(255.0 * adversarial.perturbed, dtype=np.uint8)
            for adversarial in adversarials
    ]):
        print(np.max(adversarial_images), np.min(adversarial_images))
        Image.fromarray(image).save(
            os.path.join(args.output_path, f"images/{i}.png"))