예제 #1
0
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression
        }  # ,
        # SVC: ScikitlearnSVC,
        # LinearSVC: ScikitlearnSVC}

        (_, _), (x_test, y_test) = self.iris

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            attack = SaliencyMapMethod(classifier, theta=1, batch_size=128)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with JSMA adversarial examples: '
                '%.2f%%', (acc * 100))
예제 #2
0
 def __init__(self, model, theta=0.1, gamma=1.0, batch_size=16):
     super().__init__(model=model)
     self._theta = theta
     self._gamma = gamma
     self._method = SaliencyMapMethod(classifier=self.model,
                                      theta=self._theta,
                                      gamma=self._gamma,
                                      batch_size=batch_size)
예제 #3
0
def atk_JSMA(x_train, x_test, y_train, y_test, classifier):
    #print('Create JSMA attack \n')
    adv_crafter = SaliencyMapMethod(classifier, theta=1)
    x_train_adv = adv_crafter.generate(x_train)
    x_test_adv = adv_crafter.generate(x_test)
    
    print("After JSMA Attack  \n")
    evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier)
    return x_test_adv, x_train_adv
예제 #4
0
    def test_keras_iris_vector_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
예제 #5
0
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%',
                    (acc * 100))
예제 #6
0
    def test_pytorch_iris_vector(self):
        classifier = get_tabular_classifier_pt()

        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        accuracy = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%",
                    (accuracy * 100))
예제 #7
0
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%',
                    (acc * 100))
예제 #8
0
    def _test_mnist_untargeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
def general_test(model,
                 optimizer,
                 input_shape,
                 nb_classes,
                 test_loader,
                 method,
                 btrain=False,
                 model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        model.load_state_dict(torch.load(model_file))
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=20)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=20)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=20)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=20)

    correct, total = 0, 0
    class_correct = list(0. for _ in range(10))
    class_total = list(0. for _ in range(10))

    for images, labels in test_loader:
        images = adv_crafter.generate(images.numpy())

        images = Variable(torch.from_numpy(images).cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        c = (predicted == labels.data).squeeze()
        for i in range(20):
            label = labels.data[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    for i in range(10):
        print('Accuracy of %5s : %2d %%' %
              (_classes[i], 100 * class_correct[i] / class_total[i]))
    return correct / total
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
예제 #12
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
예제 #13
0
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            attack = SaliencyMapMethod(classifier, theta=1, batch_size=128)
            x_test_iris_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
            self.assertTrue((x_test_iris_adv <= 1).all())
            self.assertTrue((x_test_iris_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris,
                                        axis=1) == preds_adv).all())
            accuracy = np.sum(preds_adv == np.argmax(
                self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with JSMA adversarial examples: "
                "%.2f%%",
                (accuracy * 100),
            )

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - self.x_test_iris))),
                                   0.0,
                                   delta=0.00001)
예제 #14
0
    def test_keras_mnist(self):

        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Keras classifier
        classifier = get_classifier_kr()

        scores = classifier._model.evaluate(x_train, y_train)
        logger.info('[Keras, MNIST] Accuracy on training set: %.2f%%',
                    (scores[1] * 100))

        scores = classifier._model.evaluate(x_test, y_test)
        logger.info('[Keras, MNIST] Accuracy on test set: %.2f%%',
                    (scores[1] * 100))

        # targeted

        # Generate random target classes
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
예제 #15
0
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression
        }  # ,
        # SVC: ScikitlearnSVC,
        # LinearSVC: ScikitlearnSVC}

        (_, _), (x_test, y_test) = self.iris
        x_test_original = x_test.copy()

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            attack = SaliencyMapMethod(classifier, theta=1, batch_size=128)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
            accuracy = np.sum(
                preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with JSMA adversarial examples: '
                '%.2f%%', (accuracy * 100))

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - x_test))),
                                   0.0,
                                   delta=0.00001)
예제 #16
0
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = SaliencyMapMethod(classifier=classifier)

        self.assertIn(
            'For `SaliencyMapMethod` classifier must be an instance of '
            '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
            '(<class \'object\'>,).', str(context.exception))
예제 #17
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=.5, gamma=1.)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=1,
                                            eps=1,
                                            eps_step=0.5,
                                            max_iter=100,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=1)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
예제 #18
0
class SaliencyMapAttack(AdversarialAttack):
    def __init__(self, model, theta=0.1, gamma=1.0, batch_size=16):
        super().__init__(model=model)
        self._theta = theta
        self._gamma = gamma
        self._method = SaliencyMapMethod(classifier=self.model,
                                         theta=self._theta,
                                         gamma=self._gamma,
                                         batch_size=batch_size)

    def attack_method(self, x, y=None):
        params = {}
        if y is not None:
            params['y'] = y
        return self._method.generate(x=x, **params)
예제 #19
0
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(
            model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = SaliencyMapMethod(classifier=classifier)

        self.assertIn(
            'For `SaliencyMapMethod` classifier must be an instance of '
            '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
            '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).',
            str(context.exception))
예제 #20
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Generate random target classes
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
예제 #21
0
파일: utils.py 프로젝트: voidstrike/AIAML
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3):
    model.eval()
    wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class)

    if method == 'deepfool':
        adv_crafter = DeepFool(wmodel)
    elif method == 'bim':
        adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size)
    elif method == 'jsma':
        adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size)
    elif method == 'cw2':
        adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size)
    elif method == 'cwi':
        adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size)
    elif method == 'fgsm':
        adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size)
    elif method == 'pgd':
        adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps)
    else:
        raise NotImplementedError('Unsupported Attack Method: {}'.format(method))

    return adv_crafter
                                    eps_step=0.6,
                                    batch_size=64)

    if args.attack == 'bim':
        if args.d == 'imagenet':
            attack = BasicIterativeMethod(classifier=classifier,
                                          eps=0.6,
                                          batch_size=64,
                                          max_iter=25)
        else:
            attack = BasicIterativeMethod(classifier=classifier,
                                          eps=0.6,
                                          batch_size=64)

    if args.attack == 'jsma':
        attack = SaliencyMapMethod(classifier=classifier, batch_size=64)

    if args.attack == 'c+w':
        attack = CarliniL2Method(classifier=classifier, batch_size=64)

    # generating adversarial of the testing dataset and save it to the folder './adv'
    if args.d == 'mnist' or args.d == 'cifar':
        x_adv = attack.generate(x=x_test)
        np.save('./adv/{}_{}.npy'.format(args.d, args.attack), x_adv)
    if args.d == 'imagenet':
        x_adv = attack.generate(x=x_test)
        np.save('./adv/{}_{}_{}.npy'.format(args.d, args.attack, args.model),
                x_adv)

    # accuracy of our test data
    pred = np.argmax(classifier.predict(x_test), axis=1)
예제 #23
0
def JSMA(victims):
    adv_crafter = SaliencyMapMethod(model)
    finalVictims = adv_crafter.generate(x = victims)
    return finalVictims
예제 #24
0
def saliency_map(classifier, inputs, true_targets, epsilon):
    adv_crafter = SaliencyMapMethod(classifier, theta=epsilon, gamma=0.11)
    x_test_adv = adv_crafter.generate(x=inputs)
    return x_test_adv
            x_test, y_test = pickle.load(
                open(
                    './dataset_imagenet/%s_%s_val_%i.p' %
                    (args.d, args.model, int(i)), 'rb'))

            if args.attack == 'fgsm':
                attack = FastGradientMethod(classifier=classifier,
                                            eps=0.6,
                                            eps_step=0.6)
            if args.attack == 'bim':
                attack = BasicIterativeMethod(classifier=classifier,
                                              eps=0.6,
                                              max_iter=5)
            if args.attack == 'jsma':
                # attack = ProjectedGradientDescent(classifier=classifier, eps=0.6, max_iter=5)
                attack = SaliencyMapMethod(classifier=classifier)
            if args.attack == 'c+w':
                attack = CarliniL2Method(classifier=classifier)
                # attack = CarliniLInfMethod(classifier=classifier, batch_size=1, max_iter=2)
                # attack = FastGradientMethod(classifier=classifier)
            from datetime import datetime
            now = datetime.now()
            current_time_after = now.strftime("%H:%M:%S")
            print("Current Time After=", current_time_after)

            print('Generating adversarial examples----------------')
            print(i, x_test.shape, y_test.shape)
            x_adv = attack.generate(x=x_test)
            print('Saving adversarial examples----------------')
            print(x_adv.shape)
            pickle.dump(x_adv,
예제 #26
0
    def test_pytorch_mnist(self):
        x_train_mnist = np.swapaxes(self.x_train_mnist, 1,
                                    3).astype(np.float32)
        x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32)
        x_test_original = x_test_mnist.copy()

        # Create basic PyTorch model
        classifier = get_image_classifier_pt()

        scores = get_labels_np_array(classifier.predict(x_train_mnist))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist,
                                                   axis=1)) / self.n_train
        logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%",
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(x_test_mnist))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("\n[PyTorch, MNIST] Accuracy on test set: %.2f%%",
                    (accuracy * 100))

        # targeted
        # Generate random target classes
        nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=self.n_test)
        while (targets == np.argmax(self.y_test_mnist, axis=1)).any():
            targets = np.random.randint(nb_classes, size=self.n_test)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_mnist_adv = df.generate(x_test_mnist,
                                       y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test_mnist == x_test_mnist_adv).all())
        self.assertFalse((0.0 == x_test_mnist_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_mnist_adv = df.generate(x_test_mnist)

        self.assertFalse((x_test_mnist == x_test_mnist_adv).all())
        self.assertFalse((0.0 == x_test_mnist_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - x_test_mnist))),
                               0.0,
                               delta=0.00001)