Beispiel #1
0
 def __init__(self,
              model,
              targeted=False,
              step_size_iter=.1,
              max_perturbation=.3,
              norm_order=np.inf,
              max_iterations=100,
              num_random_init=0,
              batch_size=16):
     super().__init__(model=model)
     self._targeted = targeted
     self._step_size_iter = step_size_iter
     self._max_perturbation = max_perturbation
     self._norm_order = norm_order
     self._max_iterations = max_iterations
     self._num_random_init = num_random_init
     self._method = ProjectedGradientDescent(
         classifier=self.model,
         targeted=self._targeted,
         norm=self._norm_order,
         eps=self._max_perturbation,
         eps_step=self._step_size_iter,
         max_iter=self._max_iterations,
         num_random_init=self._num_random_init,
         batch_size=batch_size)
    def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128):
        self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
        self.norm = norm
        self.batch_size = batch_size
        self.attack = ProjectedGradientDescent(self.wrapped_pytorch_model,
                                               norm=norm,
                                               random_init=False,
                                               batch_size=batch_size)

        # Use GPU for computation if it is available
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
def pgd_linf(model, X, y, optimizer,epsilon=0.1):
    """ Construct pgd adversarial examples on the examples X"""
    classifier = PyTorchClassifier(
    model=model_concetenate,
    loss = custom_loss,
    optimizer=optimizer,
    input_shape=(1,28,28),
    nb_classes=10,
    device_type='gpu'
    )
    attack = ProjectedGradientDescent(classifier=classifier,eps=epsilon)
        
    X_adv = attack.generate(X.numpy(),y.numpy())
    return torch.Tensor(X_adv)
Beispiel #4
0
class PGDAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 targeted=False,
                 step_size_iter=.1,
                 max_perturbation=.3,
                 norm_order=np.inf,
                 max_iterations=100,
                 num_random_init=0,
                 batch_size=16):
        super().__init__(model=model)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._norm_order = norm_order
        self._max_iterations = max_iterations
        self._num_random_init = num_random_init
        self._method = ProjectedGradientDescent(
            classifier=self.model,
            targeted=self._targeted,
            norm=self._norm_order,
            eps=self._max_perturbation,
            eps_step=self._step_size_iter,
            max_iter=self._max_iterations,
            num_random_init=self._num_random_init,
            batch_size=batch_size)

    def attack_method(self, x, y=None):
        params = {}
        if y is not None:
            params['y'] = y
        return self._method.generate(x=x, **params)
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test PGD with np.inf norm
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', acc * 100)

        # Test PGD with 3 random initialisations
        attack = ProjectedGradientDescent(classifier, num_random_init=3)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on adversarial train examples with 3 random initialisations: %.2f%%',
            acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with 3 random initialisations: %.2f%%',
            acc * 100)
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))
    def evaluate_pgd(self, data_loader, num_iter=40):
        """Adversarial evaluation by PGD"""
        norm, eps = np.inf, attack_configs['PGD'][self.dataset]['epsilon']
        eps_step = 2 * eps / num_iter
        adv_crafter = ProjectedGradientDescent(self.classifier,
                                               norm=norm,
                                               eps=eps,
                                               eps_step=eps_step,
                                               max_iter=num_iter,
                                               random_init=True)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC,
        }

        (_, _), (x_test, y_test) = self.iris
        x_test_original = x_test.copy()

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((np.array(x_test) == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(np.array(y_test),
                                        axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test)
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with PGD adversarial examples: "
                "%.2f%%",
                (acc * 100),
            )
    def test_pytorch_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((np.array(x_test) == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(np.array(y_test),
                                    axis=1) == preds_adv).all())
        acc = np.sum(
            preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test)
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted PGD on Iris: %.2f%%',
                    (acc * 100))
Beispiel #11
0
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC
        }

        (_, _), (x_test, y_test) = self.iris
        x_test_original = x_test.copy()

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with PGD adversarial examples: '
                '%.2f%%', (acc * 100))

            # Test targeted attack
            targets = random_targets(y_test, nb_classes=3)
            attack = ProjectedGradientDescent(classifier,
                                              targeted=True,
                                              eps=1,
                                              eps_step=0.1)
            x_test_adv = attack.generate(x_test, **{'y': targets})
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(
                preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
            logger.info(
                'Success rate of ' + classifier.__class__.__name__ +
                ' on targeted PGD on Iris: %.2f%%', (acc * 100))

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - x_test))),
                                   0.0,
                                   delta=0.00001)
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = ProjectedGradientDescent(classifier=classifier)

        self.assertIn(
            'For `ProjectedGradientDescent` classifier must be an instance of '
            '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
            '(<class \'object\'>,).', str(context.exception))
    def test_keras_iris_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.2,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(
            model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = ProjectedGradientDescent(classifier=classifier)

        self.assertIn(
            'For `ProjectedGradientDescent` classifier must be an instance of '
            '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
            '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).',
            str(context.exception))
Beispiel #15
0
def train_model(model,
                train_generator,
                val_generator,
                save_path,
                nb_epochs=20,
                adv_eps=0.0,
                adv_frac=0.5,
                **kwargs):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    best_acc = 0
    attacker = None
    if adv_eps > 0:
        classifier = cnn_model.SmoothedPytorchClassifier(
            model,
            nb_classes=40,
            loss=torch.nn.CrossEntropyLoss(),
            optimizer=torch.optim.Adam(model.parameters(), lr=0.0001),
            input_shape=(cnn_model.WINDOW_LENGTH, ))
        attacker = ProjectedGradientDescent(
            classifier,
            eps=adv_eps,
            eps_step=adv_eps / 5,
            max_iter=10,
            batch_size=train_generator.batch_size)
        #attacker = FastGradientMethod(classifier,eps=adv_eps,batch_size=train_generator.batch_size)
    for i in range(nb_epochs):
        logger.info("Epoch %d" % i)
        fit(model,
            train_generator,
            optimizer,
            adversarial_attacker=attacker,
            adversarial_frac=adv_frac,
            **kwargs)
        loss, nat_acc, adv_acc = eval_benign(model,
                                             val_generator,
                                             adversarial_attacker=attacker,
                                             niters=1)
        logger.info("Validation loss : %f" % loss)
        logger.info("Validation accuracy : %f" % nat_acc)
        acc = nat_acc
        if adv_eps > 0:
            logger.info("Adversarial accuracy : %f" % adv_acc)
            acc = adv_acc
        if acc > best_acc:
            best_acc = acc
            logger.info("Saving model")
            torch.save(model.state_dict(), save_path)
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
Beispiel #17
0
def pgd_adv_train(model, data, outpath, model_name):
    attack = ProjectedGradientDescent(model,
                                      eps=0.015,
                                      eps_step=0.001,
                                      max_iter=2,
                                      targeted=False,
                                      num_random_init=0,
                                      )

    adv_trainer = AdversarialTrainer(model,
                                     attacks=attack,
                                     ratio=1.0)
    print('>>> Processing adversarial training, it will take a while...')
    x_train, y_train = data
    adv_trainer.fit(x_train, y_train, nb_epochs=30, batch_size=32)

    savefile = os.path.join(outpath, model_name)
    print('>>>Save the model to [{}]'.format(savefile))
    adv_trainer.classifier.save(savefile)

    return adv_trainer.classifier
Beispiel #18
0
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3):
    model.eval()
    wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class)

    if method == 'deepfool':
        adv_crafter = DeepFool(wmodel)
    elif method == 'bim':
        adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size)
    elif method == 'jsma':
        adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size)
    elif method == 'cw2':
        adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size)
    elif method == 'cwi':
        adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size)
    elif method == 'fgsm':
        adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size)
    elif method == 'pgd':
        adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps)
    else:
        raise NotImplementedError('Unsupported Attack Method: {}'.format(method))

    return adv_crafter
class PGD:
    """
    Class for adversarial attacks based on projected gradient descent (PGD).
    The implementation of PGD in ART executes projection on a feasible region
    after each iteration. However, random restrating is not used in this
    implementation. Not using radom restarting is the difference between the
    PGD implemented in ART and the one described by Madry et al. 

    This adversarial attack subsumes the iterative FGSM. 
    """
    def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128):
        self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
        self.norm = norm
        self.batch_size = batch_size
        self.attack = ProjectedGradientDescent(self.wrapped_pytorch_model,
                                               norm=norm,
                                               random_init=False,
                                               batch_size=batch_size)

        # Use GPU for computation if it is available
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

    def generatePerturbation(self, data, budget, max_iter=15):
        images, _ = data

        # eps_step is not allowed to be larger than budget according to the
        # documentation of ART.
        eps_step = budget / 5
        images_adv = self.attack.generate(x=images.cpu().numpy(),
                                          norm=self.norm,
                                          eps=budget,
                                          eps_step=eps_step,
                                          max_iter=max_iter,
                                          batch_size=self.batch_size)
        images_adv = torch.from_numpy(images_adv)

        # The output to be returned should be loaded on an appropriate device.
        return images_adv.to(self.device)
Beispiel #20
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=.5, gamma=1.)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=1,
                                            eps=1,
                                            eps_step=0.5,
                                            max_iter=100,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=1)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
    def test_pytorch_iris_pt(self):
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(
            targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted PGD on Iris: %.2f%%",
                    (acc * 100))
    def _test_backend_mnist(self, classifier, x_train, y_train, x_test,
                            y_test):
        x_test_original = x_test.copy()

        # Test PGD with np.inf norm
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / len(y_train)
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test),
                                                        axis=1)) / len(y_test)
        logger.info("Accuracy on adversarial test examples: %.2f%%", acc * 100)

        # Test PGD with 3 random initialisations
        attack = ProjectedGradientDescent(classifier,
                                          num_random_init=3,
                                          max_iter=5)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / len(y_train)
        logger.info(
            "Accuracy on adversarial train examples with 3 random initialisations: %.2f%%",
            acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test),
                                                        axis=1)) / len(y_test)
        logger.info(
            "Accuracy on adversarial test examples with 3 random initialisations: %.2f%%",
            acc * 100)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(np.array(x_test_original) - np.array(x_test)))),
                               0.0,
                               delta=0.00001)
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
        assert isinstance(model, AttackPGD), 'Incorrect Model Configuration'
    model = model.model.eval()
    # model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)
    elif method == 'PGD':
        adv_crafter = ProjectedGradientDescent(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images, conf)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
Beispiel #24
0
datagen.fit(x_train)
art_datagen = KerasDataGenerator(
    datagen.flow(x=x_train, y=y_train, batch_size=batch_size, shuffle=True),
    size=x_train.shape[0],
    batch_size=batch_size,
)

# Create a toy Keras CNN architecture & wrap it under ART interface
classifier = KerasClassifier(build_model(),
                             clip_values=(0, 1),
                             use_logits=False)

# Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model
pgd = ProjectedGradientDescent(classifier,
                               eps=8,
                               eps_step=2,
                               max_iter=10,
                               num_random_init=20)

# Create some adversarial samples for evaluation
x_test_pgd = pgd.generate(x_test)

# Create adversarial trainer and perform adversarial training
adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0)
adv_trainer.fit_generator(art_datagen, nb_epochs=83)

# Evaluate the adversarially trained model on clean test set
labels_true = np.argmax(y_test, axis=1)
labels_test = np.argmax(classifier.predict(x_test), axis=1)
print("Accuracy test set: %.2f%%" %
      (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))
    predict = classifier.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print('Accuracy on CarliniLInfMethod test examples: {:.3f}%'.format(
        accuracy * 100))
    carlini_example = x_test_adv[example]

    print("*" * 100)
    attack = ProjectedGradientDescent(classifier,
                                      norm=np.inf,
                                      eps=0.3,
                                      eps_step=0.1,
                                      max_iter=100)
    x_test_adv = attack.generate(x_test)
    perturbation = np.mean(np.abs((x_test_adv - x_test)))
    print('Average perturbation: {:.10f}'.format(perturbation))
    predict = classifier.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print(
        'Accuracy on ProjectedGradientDescent[norm=inf] test examples: {:.3f}%'
Beispiel #26
0
def test_robust(opt, model, classifier, attack_method, c, norm=None):
    if opt.attack == 'FGSM':
        adv_crafter = FastGradientMethod(classifier,
                                         norm=norm,
                                         eps=c,
                                         targeted=False,
                                         num_random_init=0,
                                         batch_size=opt.bs)
    if opt.attack == 'PGD':
        adv_crafter = ProjectedGradientDescent(classifier,
                                               norm=norm,
                                               eps=c,
                                               eps_step=c / 10.,
                                               max_iter=10,
                                               targeted=False,
                                               num_random_init=1,
                                               batch_size=opt.bs)
    if opt.attack == 'BIM':
        adv_crafter = ProjectedGradientDescent(classifier,
                                               norm=norm,
                                               eps=c,
                                               eps_step=c / 10.,
                                               max_iter=10,
                                               targeted=False,
                                               num_random_init=0,
                                               batch_size=bs)
    if opt.attack == 'JSMA':
        adv_crafter = SaliencyMapMethod(classifier,
                                        theta=0.1,
                                        gamma=c,
                                        batch_size=opt.bs)
    if opt.attack == 'CW':
        adv_crafter = cw.L2Adversary(targeted=False,
                                     confidence=0.01,
                                     c_range=(c, 1e10),
                                     max_steps=1000,
                                     abort_early=False,
                                     search_steps=5,
                                     box=(0., 1.0),
                                     optimizer_lr=0.01)

    correct = 0
    total = 0
    total_sum = 0
    common_id = []
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        output = classifier.predict(inputs.cpu().numpy(), batch_size=opt.bs)
        output = torch.tensor(output)
        output = output.cuda()
        init_pred = output.max(1, keepdim=False)[1]
        common_id = np.where(
            init_pred.cpu().numpy() == targets.cpu().numpy())[0]

        if opt.attack == 'CW':
            x_test_adv = adv_crafter(model, inputs, targets, to_numpy=True)
        else:
            x_test_adv = adv_crafter.generate(x=inputs.cpu().numpy())

        perturbed_output = classifier.predict(x_test_adv)
        perturbed_output = torch.tensor(perturbed_output)
        perturbed_output = perturbed_output.cuda()
        final_pred = perturbed_output.max(1, keepdim=False)[1]
        total_sum += targets.size(0)
        total += len(common_id)
        correct += final_pred[common_id].eq(
            targets[common_id].data).cpu().sum()
        attack_acc = 100. * float(correct) / total

        progress.progress_bar(
            batch_idx, len(testloader),
            'Attack Strength:%.3f, robust accuracy: %.3f%% (%d/%d)'
            '' % (c, attack_acc, correct, total))
Beispiel #27
0
def get_adversarial(targeted,
                    attack_name,
                    classifier,
                    xs,
                    target_ys,
                    batch_size,
                    dataset,
                    fgsm_epsilon=0,
                    cwl2_confidence=0):

    # The attack
    attack = ''
    samples_range = xs.shape[0]

    #======================================
    if attack_name == 'FastGradientMethod':
        # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False
        attack = FastGradientMethod(classifier=classifier,
                                    targeted=targeted,
                                    eps=fgsm_epsilon,
                                    batch_size=batch_size)
    #=====================================
    elif attack_name == 'CarliniLInfMethod':
        # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5,
        #max_doubling=5, eps=0.3, batch_size=128
        attack = CarliniLInfMethod(classifier=classifier,
                                   max_iter=1000,
                                   targeted=targeted,
                                   batch_size=batch_size)
    #-------------------------------
    elif attack_name == 'UniversalPerturbation':
        # attacker='deepfool', attacker_params=None, delta=0.2,
        # max_iter=20, eps=10.0, norm=np.inf

        if targeted:
            print('UniversalPerturbation attack cannot be targeted.')
            exit()
        attack = UniversalPerturbation(classifier=classifier, max_iter=5)

    #==============================================
    elif attack_name == 'ProjectedGradientDescent':
        # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100,
        # targeted=False, num_random_init=0, batch_size=1
        if dataset == 'mnist':
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=.3,
                                              eps_step=0.01,
                                              num_random_init=0,
                                              max_iter=40,
                                              batch_size=batch_size)
        else:
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=8.0,
                                              eps_step=2.0,
                                              num_random_init=0,
                                              max_iter=7,
                                              batch_size=batch_size)

    if targeted:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :],
                              y=target_ys[0:batch_size])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :],
                y=target_ys[ii:ii + batch_size])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(
                last_samples,
                y=target_ys[last_ii + batch_size:])  ################
            adv = np.concatenate((adv, adv_samples), axis=0)
    else:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(last_samples)  ################
            adv = np.concatenate((adv, adv_samples), axis=0)

    adv = np.asarray(adv)
    return adv
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris,
                                        axis=1) == preds_adv).all())
            acc = np.sum(preds_adv == np.argmax(
                self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with PGD adversarial examples: "
                "%.2f%%",
                (acc * 100),
            )

            # Test targeted attack
            targets = random_targets(self.y_test_iris, nb_classes=3)
            attack = ProjectedGradientDescent(classifier,
                                              targeted=True,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(preds_adv == np.argmax(
                targets, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Success rate of " + classifier.__class__.__name__ +
                " on targeted PGD on Iris: %.2f%%", (acc * 100))

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - self.x_test_iris))),
                                   0.0,
                                   delta=0.00001)
Beispiel #29
0
    # # training for MNIST
    # classifier = KerasClassifier(clip_values=(-0.5, 0.5), model=model, use_logits=False)
    # attack = ProjectedGradientDescent(classifier, eps=0.3, eps_step=0.01, max_iter=20, batch_size=128)

    # ## training for CIFAR
    # classifier = KerasClassifier(model=model, use_logits=False)
    # attack = ProjectedGradientDescent(classifier, eps=8/255, eps_step=2/255, max_iter=10, batch_size=512)

    ## training for SVHN
    classifier = KerasClassifier(clip_values=(-0.5, 0.5),
                                 model=model,
                                 use_logits=False)
    attack = ProjectedGradientDescent(classifier,
                                      eps=8 / 255,
                                      eps_step=1 / 255,
                                      max_iter=20,
                                      batch_size=512)

    x_test_pgd = attack.generate(x_test, y_test)
    # np.save('./data/' + dataset + '_data/model/' + model_name + '_y_' + attack_name + '.npy', x_test_pgd)

    # Evaluate the benign trained model on adv test set
    labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
    print('Accuracy on original PGD adversarial samples: %.2f%%' %
          (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

    trainer = AdversarialTrainer(classifier, attack, ratio=1.0)
    trainer.fit(x_train, y_train, nb_epochs=60, batch_size=1024)

    classifier.save(filename='adv_' + model_name + '.h5',
Beispiel #30
0
predictions = classifier2.predict(shared_x_test)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model2 on shared test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)
print('Top 5 accuracy of model2 on shared test examples: {}%'.format(
    top_five_acc * 100))

# Define attack based on model1

if attack_choice == "fgsm":
    attack = FastGradientMethod(classifier=classifier1, eps=eps)
else:
    attack = ProjectedGradientDescent(classifier=classifier1,
                                      eps=eps,
                                      max_iter=adv_steps)

print()

print("generating adversarial examples...")

# generate adv examples for model1 based on shared data
x_test_adv = attack.generate(x=shared_x_test)

# test adv examples generated from model1 on model1
predictions = classifier1.predict(x_test_adv)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model1 on adversarial test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)