def _test_mnist_targeted(self, classifier, x_test):
        x_test_original = x_test.copy()

        # Test FGSM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1.0,
                                      eps_step=0.01,
                                      targeted=True,
                                      batch_size=128,
                                      verbose=False)
        # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1)  % 10, 10)
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0
        x_test_adv = attack.generate(x_test, y=y_test_adv)

        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        # This doesn't work all the time, especially with small networks
        self.assertGreaterEqual((y_test_adv == test_y_pred).sum(),
                                x_test.shape[0] // 2)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
コード例 #2
0
    def test_9a_keras_iris_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True)
        attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.2, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))
コード例 #3
0
ファイル: attack_svm.py プロジェクト: zobaed11/athena
def get_adversarial_examples(X, Y, model, nb_classes, attack=None):
    assert model is not None
    assert attack is not None

    art_classifier = SklearnClassifier(model=model,
                                       clip_values=(0, nb_classes))

    attacker = None
    if attack == ATTACK.PGD:
        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=0.2,
                                            eps_step=0.1,
                                            max_iter=3,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=128)
    elif attack == ATTACK.DEEPFOOL:
        attacker = DeepFool(classifier=art_classifier,
                            max_iter=5,
                            epsilon=1e-6,
                            nb_grads=3,
                            batch_size=1)
    elif attack == ATTACK.FGSM:
        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=0.3,
                                      targeted=False,
                                      batch_size=128)
    elif attack == ATTACK.BIM:
        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        eps=0.3,
                                        eps_step=0.1,
                                        targeted=False,
                                        batch_size=128)
    elif attack == ATTACK.JSMA:
        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=0.3,
                                     gamma=0.5,
                                     batch_size=128)
    elif attack == ATTACK.CW_L2:
        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=0.1)
    elif attack == ATTACK.CW_Linf:
        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=0.01)
    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv
コード例 #4
0
    def test_5_pytorch_iris(self):
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.1)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = BasicIterativeMethod(classifier, targeted=True, eps=1.0, eps_step=0.1, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
コード例 #5
0
    def _test_backend_mnist(self, classifier, x_train, y_train, x_test,
                            y_test):
        x_test_original = x_test.copy()

        # Test BIM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1,
                                      eps_step=0.1,
                                      batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (acc * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
コード例 #6
0
def _bim(model, data, labels, attack_args):
    """
    Basic Iteractive Method
    ADVERSARIAL EXAMPLES IN THE PHYSICAL WORLD
    Alexey Kurakin, Ian J. Goodfellow, Samy Bengio
    ``https://arxiv.org/pdf/1607.02533.pdf``
    :param model:
    :param data:
    :param labels:
    :param attack_args:
    :return:
    """
    eps = attack_args.get('eps', 0.3)
    eps_step = attack_args.get('eps_step', eps / 10.)
    max_iter = attack_args.get('max_iter', 100)

    targeted = attack_args.get('targeted', False)
    print('>>> Generating BIM examples.')
    attacker = BasicIterativeMethod(classifier=model,
                                    eps=eps,
                                    eps_step=eps_step,
                                    max_iter=max_iter,
                                    targeted=targeted)
    return attacker.generate(data, labels)
コード例 #7
0
    def test_8_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.estimators.classification.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            # Test untargeted attack
            attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.1, max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
            acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ + " on Iris with BIM adversarial examples: " "%.2f%%",
                (acc * 100),
            )

            # Test targeted attack
            targets = random_targets(self.y_test_iris, nb_classes=3)
            attack = BasicIterativeMethod(classifier, targeted=True, eps=1.0, eps_step=0.1, batch_size=128, max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Success rate of " + classifier.__class__.__name__ + " on targeted BIM on Iris: %.2f%%", (acc * 100)
            )

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
    def _test_backend_mnist(self, classifier, x_train, y_train, x_test,
                            y_test):
        x_test_original = x_test.copy()

        # Test BIM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1.0,
                                      eps_step=0.1,
                                      batch_size=128,
                                      verbose=False)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (acc * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # Test eps of array type 1
        eps = np.ones(shape=x_test.shape) * 1.0
        eps_step = np.ones_like(eps) * 0.1

        attack_params = {"eps_step": eps_step, "eps": eps}
        attack.set_params(**attack_params)

        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())

        # Test eps of array type 2
        eps = np.ones(shape=x_test.shape[1:]) * 1.0
        eps_step = np.ones_like(eps) * 0.1

        attack_params = {"eps_step": eps_step, "eps": eps}
        attack.set_params(**attack_params)

        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())

        # Test eps of array type 3
        eps = np.ones(shape=x_test.shape[2:]) * 1.0
        eps_step = np.ones_like(eps) * 0.1

        attack_params = {"eps_step": eps_step, "eps": eps}
        attack.set_params(**attack_params)

        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())

        # Test eps of array type 4
        eps = np.ones(shape=x_test.shape[3:]) * 1.0
        eps_step = np.ones_like(eps) * 0.1

        attack_params = {"eps_step": eps_step, "eps": eps}
        attack.set_params(**attack_params)

        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())
コード例 #9
0
ファイル: craft_w_art.py プロジェクト: softsys4ai/athena
def craft(X, Y, art_classifier, attack=None, **attack_params):
    assert art_classifier is not None
    assert attack is not None

    attacker = None
    if attack == ATTACK.PGD:
        eps = attack_params.get('eps', 0.2)
        eps_step = attack_params.get('eps_step', eps / 5.)
        max_iter = attack_params.get('max_iter', 3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=eps,
                                            eps_step=eps_step,
                                            max_iter=max_iter,
                                            targeted=targeted,
                                            num_random_init=0,
                                            batch_size=batch_size)

    elif attack == ATTACK.DEEPFOOL:
        eps = attack_params.get('eps', 1e-6)
        max_iter = attack_params.get('max_iter', 5)
        nb_grads = attack_params.get('nb_grads', 3)
        batch_size = attack_params.get('batch_size', 1)

        attacker = DeepFool(classifier=art_classifier,
                            max_iter=max_iter,
                            epsilon=eps,
                            nb_grads=nb_grads,
                            batch_size=batch_size)

    elif attack == ATTACK.FGSM:
        eps = attack_params.get('eps', 0.3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=eps,
                                      targeted=targeted,
                                      batch_size=batch_size)

    elif attack == ATTACK.BIM:
        eps = attack_params.get('eps', 0.3)
        eps_step = attack_params.get('eps_step', eps / 5.)
        norm = attack_params.get('norm', np.inf)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        norm=norm,
                                        eps=eps,
                                        eps_step=eps_step,
                                        targeted=targeted,
                                        batch_size=batch_size)

    elif attack == ATTACK.JSMA:
        theta = attack_params.get('theta', 0.3)
        gamma = attack_params.get('gamma', 0.5)
        batch_size = attack_params.get('batch_size', 128)

        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=theta,
                                     gamma=gamma,
                                     batch_size=batch_size)

    elif attack == ATTACK.CW_L2:
        lr = attack_params.get('lr', 0.1)
        bsearch_steps = attack_params.get('bsearch_steps', 10)

        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=lr,
                                   binary_search_steps=bsearch_steps)

    elif attack == ATTACK.CW_Linf:
        lr = attack_params.get('lr', 0.01)

        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=lr)

    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv
コード例 #10
0
    print("Creating classifier...\n")

    adv_classifier = TensorFlowV2Classifier(
        model=new_model,
        loss_object=loss_object,
        train_step=train_step,
        nb_classes=5,
        input_shape=(1, 25),
        clip_values=(0, 1),
    )

    print("Creating adversarial attack object...\n")
    bim = BasicIterativeMethod(adv_classifier,
                               eps=eps,
                               eps_step=eps_step,
                               targeted=False,
                               batch_size=2048)

    print("Generating adversarial samples...\n")
    logger.info("Craft attack on training examples")
    x_train_adv = bim.generate(train_data)
    save_samples(x_train_adv, 'bim_train', exp)
    logger.info("=" * 50)

    logger.info("Craft attack test examples")
    x_test_adv = bim.generate(test_data)
    save_samples(x_test_adv, 'bim_test', exp)
    logger.info("=" * 50)

    print("Evaluating adversarial samples on clean model...\n")