def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test PGD with np.inf norm
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', acc * 100)
Beispiel #2
0
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))
Beispiel #3
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
Beispiel #4
0
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted PGD on Iris: %.2f%%',
                    (acc * 100))
def save_adversarial_examples_batch(clean_dir, params=[0.01, 0.002, 20]):
    """
    Scripts for pre-computing and saving adversarial examples with IBM-ART
    :param clean_dir: root directory of the legitimate clean data
    :param params: parameters used for customizing attacks
    :return: None
    """
    batch_size = 16
    adv_dir = os.path.dirname(clean_dir) + "_adv_resnet152_pgd-{0}-{1}-{2}".format(params[0], params[1], params[2])

    # Load pretrained model
    model = models.resnet152(pretrained=True).cuda().eval()
    # Build crafter for adversarial examples
    mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
    std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
    classifier = PyTorchClassifier((0.0, 1.0), model=model, preprocessing=(mean, std),
                                   loss=torch.nn.modules.loss.CrossEntropyLoss(),
                                   optimizer= 'hello-world',  # torch.optim.Adam,  # doesn't really matter for pretrained networks
                                   input_shape=(3, 224, 224), nb_classes=1000)
    adv_crafter = ProjectedGradientDescent(classifier,
                                           eps=params[0], eps_step=params[1], max_iter=params[2])

    image_paths = sorted(glob(clean_dir + '/**/*.JPEG', recursive=True))
    count = 0
    for start in range(0, len(image_paths), batch_size):
        end = min(start + batch_size, len(image_paths))
        image_batch_paths = image_paths[start:end]
        images = load_image_batch(image_batch_paths)
        adv_images = adv_crafter.generate(x=images)

        assert len(adv_images) == len(images)

        print("start: %d, end: %d" % (start, end))
        for (image_path, adv_image) in zip(image_batch_paths, adv_images):
            output_path = os.path.join(adv_dir, image_path[len(clean_dir):])
            save_array_to_image(adv_image, output_path)

            count += 1
            print(count)
Beispiel #6
0
    print('\nMetrics:%.2f, %.2f, %.3f, %.3f' % (measure[0:4]))
    if Defense:
        defenses(model, X, FEA_X, FS=True, SS=True)
    if Transfer:
        transfer(X, FEA_X)
    if Show:
        FEA_X = (FEA_X + 1) / 2
        grid_visual(
            np.swapaxes(
                np.reshape(
                    FEA_X,
                    (10, FEA_X.shape[0] // 10, img_rows, img_cols, nchannels)),
                0, 1))

if 'PGD_art' in attacks:
    pgd = PGD_art(art)
    if NORM == 2:
        pgd_params = {
            'eps': 10.,
            'eps_step': EPS,
            'max_iter': MAX_ITER,
            'norm': 2,
            # 'batch_size': 1,
        }
    elif NORM == np.inf:
        pgd_params = {
            'eps': 10.,
            'eps_step': EPS,
            'max_iter': MAX_ITER,
            'norm': np.inf,
            # 'batch_size': 1,
mnist_classifier = PyTorchClassifier(clip_values=(0, 1), model=model, loss=criterion, optimizer=optimizer,
                                     input_shape=(1, 28, 28), nb_classes=10)

# Train the classifier
# mnist_classifier.fit(x_train, y_train, batch_size=64, nb_epochs=50)
# torch.save(model.state_dict(), "./minst.pt")
model.load_state_dict(torch.load("../checkpoints/model-nn-epoch100.pt"))


# Test the classifier
predictions = mnist_classifier.predict(test_dataset_array)
# print(predictions)

accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(y_test)
print('Accuracy before attack: {}%'.format(accuracy * 100))

# Craft the adversarial examples
epsilon = 0.2  # Maximum perturbation
# adv_crafter = AdversarialPatch(mnist_classifier, batch_size=16, max_iter=10)
# adv_crafter = FastGradientMethod(mnist_classifier, eps=epsilon)
# adv_crafter = CarliniL2Method(mnist_classifier)
adv_crafter = ProjectedGradientDescent(mnist_classifier)
# adv_crafter = DeepFool(mnist_classifier, epsilon=epsilon, max_iter=10)

x_test_adv = adv_crafter.generate(x=x_test)

# Test the classifier on adversarial exmaples
print(x_test_adv)
predictions = mnist_classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(y_test)
print('Accuracy after attack: {}%'.format(accuracy * 100))
    print('Accuracy before attack: {}%'.format(accuracy * 100))

    # Craft the adversarial examples

    # PGD-20
    # adv_crafter_pgd_40 = ProjectedGradientDescent(mnist_classifier, eps=epsilon, max_iter=40, batch_size=batch_size)
    #
    # x_test_adv = adv_crafter_pgd_40.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    # predictions = mnist_classifier.predict(x_test_adv)
    # accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100))

    # PGD-100
    adv_crafter_pgd_100 = ProjectedGradientDescent(mnist_classifier, max_iter=100, batch_size=batch_size)

    x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100))

    # FGSM
    adv_crafter_fgsm = FastGradientMethod(mnist_classifier, eps=epsilon, batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    # Craft the adversarial examples

    # # PGD-20
    # adv_crafter_pgd_20 = ProjectedGradientDescent(cifar_classifier, eps=epsilon, eps_step=0.01, max_iter=20, batch_size=batch_size)
    #
    # x_test_adv = adv_crafter_pgd_20.generate(x=test_dataset_array)
    #
    # # Test the classifier on adversarial exmaples
    # predictions = cifar_classifier.predict(x_test_adv)
    # accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100))

    # PGD-100
    adv_crafter_pgd_100 = ProjectedGradientDescent(cifar_classifier,
                                                   eps=epsilon,
                                                   eps_step=0.01,
                                                   max_iter=100,
                                                   batch_size=batch_size)

    x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100))

    # FGSM
    adv_crafter_fgsm = FastGradientMethod(cifar_classifier,
                                          eps=epsilon,
Beispiel #10
0
    begin_time = time.time()
    robust_classifier_model = load_model("./model/adv_model")
    robust_classifier = KerasClassifier((0., 1.), robust_classifier_model)
    # ==================================2-1.准备对抗训练的攻击方法 =============================== #
    """
    The `ratio` determines how many of the clean samples in each batch are replaced with their adversarial counterpart.
    warning: Both successful and unsuccessful adversarial samples are used for training. In the case of unbounded attacks
            (e.g., DeepFool), this can result in invalid (very noisy) samples being included.
    """
    if adv_train_attack == "FGM":
        attacks = FastGradientMethod(robust_classifier, eps=attack_par["epsilon"], norm=attack_par["norm_type"])
    elif adv_train_attack == "BIM":
        attacks = BasicIterativeMethod(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"],
                                              eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"])
    elif adv_train_attack == "PGD":
        attacks = ProjectedGradientDescent(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"],
                                          eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"])
    elif adv_train_attack == "JSMA":
        attacks = SaliencyMapMethod(robust_classifier, theta=attack_par["theta"], gamma=attack_par["gamma"])
    elif adv_train_attack == "DeepFool":
        attacks = DeepFool(robust_classifier, max_iter=attack_par["max_iteration"], epsilon=attack_par["epsilon"])


    # ==================================2-2.开始对抗训练 =============================== #
    trainer = AdversarialTrainer(robust_classifier, attacks, ratio=ratio_value)
    trainer.fit(x_train, y_train, nb_epochs=adv_train_num, batch_size=128, verbose=2)
    robust_classifier_model.save("./model/adv_model")
    end_time = time.time()
    model = load_model("./model/adv_model")
    scores = model.evaluate(x_test, y_test, verbose=0)
    # print('Test loss:', scores[0])
    # print('Test accuracy:', scores[1])
Beispiel #11
0
                                     optimizer=optimizer,
                                     input_shape=(1, 28, 28),
                                     nb_classes=10)

# Train the classifier
cifar_classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)

# Test the classifier
predictions = cifar_classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy before attack: {}%'.format(accuracy * 100))

# Craft the adversarial examples
epsilon = 0.2  # Maximum perturbation
adv_crafter = ProjectedGradientDescent(cifar_classifier, eps=epsilon)
x_test_adv = adv_crafter.generate(x=x_test)

# Test the classifier on adversarial exmaples
predictions = cifar_classifier.predict(x_test_adv)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy after attack: {}%'.format(accuracy * 100))

cifar_classifier.save('cifar_fgsm_state_dict', 'models')

preprocess = Autoencoder()
preprocess.load_state_dict(torch.load('models/conv_autoencoder.pth'))

x_test_adv = torch.from_numpy(x_test_adv)
x_test_denoised = preprocess(x_test_adv)
        model.load_state_dict(torch.load(file))

        # Test the classifier
        predictions = mnist_classifier.predict(test_dataset_array)

        accuracy = np.sum(
            np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
                test_label_dataset_array)
        print('Accuracy before attack: {}%'.format(accuracy * 100))

        # Craft the adversarial examples

        # PGD-40
        adv_crafter_pgd_40 = ProjectedGradientDescent(
            mnist_classifier,
            eps=args.epsilon,
            max_iter=40,
            batch_size=args.batch_size)

        x_test_adv = adv_crafter_pgd_40.generate(x=test_dataset_array)

        # Test the classifier on adversarial exmaples
        predictions = mnist_classifier.predict(x_test_adv)
        accuracy = np.sum(
            np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
                test_label_dataset_array)
        print('Accuracy after PGD-40 attack: {}%'.format(accuracy * 100))
        log_file.write("{} {}\n".format(e, accuracy))

    log_file.close()