Exemple #1
0
    def test_class_gradient_target(self):
        classifier = get_classifier_pt()
        gradients = classifier.class_gradient(self.x_test, label=3)

        self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28))

        expected_gradients_1 = np.asarray([
            -0.00195835, -0.00134457, -0.00307221, -0.00340564, 0.00175022,
            -0.00239714, -0.00122619, 0.0, 0.0, -0.00520899, -0.00046105,
            0.00414874, -0.00171095, 0.00429184, 0.0075138, 0.00792443,
            0.0019566, 0.00035517, 0.00504575, -0.00037397, 0.00022343,
            -0.00530035, 0.0020528, 0.0, 0.0, 0.0, 0.0, 0.0
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14],
                                             expected_gradients_1,
                                             decimal=4)

        expected_gradients_2 = np.asarray([
            5.0867130e-03, 4.8564533e-03, 6.1040395e-03, 8.6531248e-03,
            -6.0958802e-03, -1.4114541e-02, -7.1085966e-04, -5.0330797e-04,
            1.2943064e-02, 8.2416134e-03, -1.9859453e-04, -9.8110031e-05,
            -3.8902226e-03, -1.2945874e-03, 7.5138002e-03, 1.7720887e-03,
            3.1399354e-04, 2.3657191e-04, -3.0891625e-03, -1.0211228e-03,
            2.0828887e-03, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :],
                                             expected_gradients_2,
                                             decimal=4)
Exemple #2
0
    def setUpClass(cls):
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')

        x_train = np.reshape(x_train, (x_train.shape[0], 1, 28, 28)).astype(np.float32)
        x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32)

        cls.x_train = x_train[:NB_TRAIN]
        cls.y_train = y_train[:NB_TRAIN]
        cls.x_test = x_test[:NB_TEST]
        cls.y_test = y_test[:NB_TEST]

        # Define the internal classifier
        classifier = get_classifier_pt()

        # Define the internal detector
        conv = nn.Conv2d(1, 16, 5)
        linear = nn.Linear(2304, 1)
        torch.nn.init.xavier_uniform_(conv.weight)
        torch.nn.init.xavier_uniform_(linear.weight)
        model = nn.Sequential(conv, nn.ReLU(), nn.MaxPool2d(2, 2), Flatten(), linear)
        model = Model(model)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        detector = PyTorchClassifier(model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28),
                                     nb_classes=1, clip_values=(0, 1))

        # Define the detector-classifier
        cls.detector_classifier = DetectorClassifier(classifier=classifier, detector=detector)
Exemple #3
0
    def test_loss_gradient(self):
        classifier = get_classifier_pt()
        gradients = classifier.loss_gradient(self.x_test, self.y_test)

        self.assertEqual(gradients.shape, (NB_TEST, 1, 28, 28))

        expected_gradients_1 = np.asarray([
            3.6839640e-05, 3.2549749e-05, 7.7749821e-05, 8.3091691e-05,
            -3.7349419e-05, 6.3347623e-05, 3.8059810e-05, 0.0000000e+00,
            0.0000000e+00, -8.7319646e-04, -9.1992842e-05, 7.8577449e-04,
            -3.5397310e-04, 7.8797276e-04, 1.6001392e-03, 1.9111208e-03,
            1.0337514e-03, 2.0264980e-04, 1.5017156e-03, 2.5167916e-04,
            -4.8513880e-06, -8.3324237e-04, 2.1826664e-04, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, :, 14],
                                             expected_gradients_1,
                                             decimal=4)

        expected_gradients_2 = np.asarray([
            8.3541102e-04, 7.9759455e-04, 9.6892234e-04, 1.1802778e-03,
            -6.0561800e-04, -1.6849663e-03, 2.7197969e-04, 4.3571385e-05,
            1.2168724e-03, 4.9924687e-04, 4.7540435e-04, -3.6275905e-04,
            -1.1702902e-03, -7.0383825e-04, 1.6001392e-03, 6.1103603e-04,
            -5.1674922e-04, 1.6046617e-04, -6.3084543e-04, -2.0675475e-04,
            4.2173881e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 14, :],
                                             expected_gradients_2,
                                             decimal=4)
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Attack
        up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Exemple #5
0
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, _) = self.mnist
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Attack
        nf = NewtonFool(ptc, max_iter=5, batch_size=100)
        x_test_adv = nf.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())

        y_pred = ptc.predict(x_test)
        y_pred_adv = ptc.predict(x_test_adv)
        y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
        y_pred_max = y_pred.max(axis=1)
        y_pred_adv_max = y_pred_adv[y_pred_bool]
        self.assertTrue((y_pred_max >= .9 * y_pred_adv_max).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemple #6
0
    def test_fit_generator(self):
        classifier = get_classifier_pt()
        accuracy = np.sum(
            np.argmax(classifier.predict(self.x_test), axis=1) == np.argmax(
                self.y_test, axis=1)) / NB_TEST
        logger.info('Accuracy: %.2f%%', (accuracy * 100))

        # Create tensors from data
        x_train_tens = torch.from_numpy(self.x_train)
        x_train_tens = x_train_tens.float()
        y_train_tens = torch.from_numpy(self.y_train)

        # Create PyTorch dataset and loader
        dataset = torch.utils.data.TensorDataset(x_train_tens, y_train_tens)
        data_loader = DataLoader(dataset=dataset, batch_size=5, shuffle=True)
        data_gen = PyTorchDataGenerator(data_loader,
                                        size=NB_TRAIN,
                                        batch_size=5)

        # Fit model with generator
        classifier.fit_generator(data_gen, nb_epochs=2)
        accuracy_2 = np.sum(
            np.argmax(classifier.predict(self.x_test), axis=1) == np.argmax(
                self.y_test, axis=1)) / NB_TEST
        logger.info('Accuracy: %.2f%%', (accuracy_2 * 100))

        self.assertEqual(accuracy, 0.4)
        self.assertAlmostEqual(accuracy_2, 0.75, delta=0.1)
Exemple #7
0
    def test_pytorch(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        ptc = get_classifier_pt()

        x_train = np.reshape(self.x_train,
                             (self.x_train.shape[0], 1, 28, 28)).astype(
                                 np.float32)

        attack_ap = AdversarialPatch(ptc,
                                     rotation_max=22.5,
                                     scale_min=0.1,
                                     scale_max=1.0,
                                     learning_rate=5.0,
                                     batch_size=10,
                                     max_iter=500)

        patch_adv, _ = attack_ap.generate(x_train)

        self.assertAlmostEqual(patch_adv[0, 8, 8],
                               -3.143605902784875,
                               delta=0.1)
        self.assertAlmostEqual(patch_adv[0, 14, 14],
                               19.790434152473054,
                               delta=0.1)
        self.assertAlmostEqual(float(np.sum(patch_adv)), 383.068, delta=0.1)
Exemple #8
0
 def test_fit_predict(self):
     classifier = get_classifier_pt()
     predictions = classifier.predict(self.x_test)
     accuracy = np.sum(
         np.argmax(predictions, axis=1) == np.argmax(self.y_test,
                                                     axis=1)) / NB_TEST
     logger.info('Accuracy after fitting: %.2f%%', (accuracy * 100))
     self.assertEqual(accuracy, 0.4)
    def test_pytorch_mnist(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.reshape(x_train,
                             (x_train.shape[0], 1, 28, 28)).astype(np.float32)
        x_test = np.reshape(x_test,
                            (x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Create basic PyTorch model
        classifier = get_classifier_pt(from_logits=True)

        scores = get_labels_np_array(classifier.predict(x_train))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(y_train,
                                                   axis=1)) / y_train.shape[0]
        logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%',
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(x_test))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%',
                    (accuracy * 100))

        attack = DeepFool(classifier, max_iter=5, batch_size=11)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        accuracy = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (accuracy * 100))

        accuracy = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def test_pytorch_mnist(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        classifier = get_classifier_pt()

        scores = get_labels_np_array(classifier.predict(x_train))
        acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (acc * 100))

        scores = get_labels_np_array(classifier.predict(x_test))
        acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%', (acc * 100))

        self._test_backend_mnist(classifier, x_test, y_test)
Exemple #11
0
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test = np.reshape(x_test,
                            (x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # First targeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = boundary.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def test_pytorch_mnist_L2(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.reshape(x_test,
                            (x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt(from_logits=True)

        # First attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())
        logger.info('CW2 Success Rate: %.2f',
                    (sum(target == y_pred_adv) / float(len(target))))

        # Second attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10)
        x_test_adv = cl2m.generate(x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())
        logger.info('CW2 Success Rate: %.2f',
                    (sum(target != y_pred_adv) / float(len(target))))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemple #13
0
    def test_pytorch_resume(self):
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32)

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # HSJ attack
        hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=10, max_eval=100, init_eval=10)

        params = {'y': y_test[2:3], 'x_adv_init': x_test[2:3]}
        x_test_adv1 = hsj.generate(x_test[0:1], **params)
        diff1 = np.linalg.norm(x_test_adv1 - x_test)

        params.update(resume=True, x_adv_init=x_test_adv1)
        x_test_adv2 = hsj.generate(x_test[0:1], **params)
        params.update(x_adv_init=x_test_adv2)
        x_test_adv2 = hsj.generate(x_test[0:1], **params)
        diff2 = np.linalg.norm(x_test_adv2 - x_test)

        self.assertGreater(diff1, diff2)
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt(from_logits=False)

        # First attack
        ead = ElasticNet(classifier=ptc, targeted=True, max_iter=2)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = ead.generate(x_test, **params)
        expected_x_test_adv = np.asarray([0.01678124, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00665895, 0.0, 0.11374763,
                                          0.36250514, 0.5472948, 0.9308808, 1.0, 0.99920374, 0.86274165, 0.6346757,
                                          0.5597227, 0.24191494, 0.25882354, 0.0091916, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
        np.testing.assert_array_almost_equal(x_test_adv[2, 0, :, 14], expected_x_test_adv, decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        ead = ElasticNet(classifier=ptc, targeted=False, max_iter=2)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = ead.generate(x_test, **params)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())
        np.testing.assert_array_equal(y_pred_adv, np.asarray([7, 1, 1, 4, 4, 1, 4, 4, 4, 4]))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Exemple #15
0
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Get MNIST
        x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # First attack
        # zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10, binary_search_steps=10)
        # params = {'y': random_targets(self.y_test, ptc.nb_classes())}
        # x_test_adv = zoo.generate(x_test, **params)
        # self.assertFalse((x_test == x_test_adv).all())
        # self.assertLessEqual(np.amax(x_test_adv), 1.0)
        # self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        # target = np.argmax(params['y'], axis=1)
        # y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        # logger.debug('ZOO target: %s', target)
        # logger.debug('ZOO actual: %s', y_pred_adv)
        # logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target))))

        # Second attack
        zoo = ZooAttack(classifier=ptc, targeted=False, learning_rate=1e-2, max_iter=15, binary_search_steps=10,
                        abort_early=False, use_resize=False, use_importance=False)
        x_test_adv = zoo.generate(x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        # print(x_test[0, 0, 14, :])
        # print(x_test_adv[0, 0, 14, :])
        # print(np.amax(x_test - x_test_adv))
        x_test_adv_expected = []

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Exemple #16
0
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        x_train = np.reshape(self.x_train,
                             (self.x_train.shape[0], 1, 28, 28)).astype(
                                 np.float32)
        x_test = np.reshape(
            self.x_test, (self.x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt(from_logits=True)

        # Attack
        attack_st = SpatialTransformation(ptc,
                                          max_translation=10.0,
                                          num_translations=3,
                                          max_rotation=30.0,
                                          num_rotations=3)
        x_train_adv = attack_st.generate(x_train)

        self.assertAlmostEqual(x_train_adv[0, 0, 13, 18], 0.627451, delta=0.01)
        self.assertAlmostEqual(attack_st.fooling_rate, 0.59, delta=0.01)

        self.assertEqual(attack_st.attack_trans_x, 0)
        self.assertEqual(attack_st.attack_trans_y, 3)
        self.assertEqual(attack_st.attack_rot, 0.0)

        x_test_adv = attack_st.generate(x_test)

        self.assertLessEqual(abs(x_test_adv[0, 0, 14, 14] - 0.008591662), 0.01)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def test_pytorch_mnist_LInf(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.reshape(x_test,
                            (x_test.shape[0], 1, 28, 28)).astype(np.float32)

        # Build PyTorchClassifier
        ptc = get_classifier_pt(from_logits=True)

        # First attack
        clinfm = CarliniLInfMethod(classifier=ptc,
                                   targeted=True,
                                   max_iter=10,
                                   eps=0.5)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = clinfm.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6)
        self.assertGreaterEqual(np.amin(x_test_adv), -1e-6)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        clinfm = CarliniLInfMethod(classifier=ptc,
                                   targeted=False,
                                   max_iter=10,
                                   eps=0.5)
        x_test_adv = clinfm.generate(x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6)
        self.assertGreaterEqual(np.amin(x_test_adv), -1e-6)

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())
Exemple #18
0
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        victim_ptc = get_classifier_pt()

        class Model(nn.Module):
            """
            Create model for pytorch.
            """
            def __init__(self):
                super(Model, self).__init__()

                self.conv = nn.Conv2d(in_channels=1,
                                      out_channels=1,
                                      kernel_size=7)
                self.pool = nn.MaxPool2d(4, 4)
                self.fullyconnected = nn.Linear(25, 10)

            # pylint: disable=W0221
            # disable pylint because of API requirements for function
            def forward(self, x):
                """
                Forward function to evaluate the model

                :param x: Input to the model
                :return: Prediction of the model
                """
                x = self.conv(x)
                x = torch.nn.functional.relu(x)
                x = self.pool(x)
                x = x.reshape(-1, 25)
                x = self.fullyconnected(x)
                x = torch.nn.functional.softmax(x)

                return x

        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        thieved_ptc = PyTorchClassifier(model=model,
                                        loss=loss_fn,
                                        optimizer=optimizer,
                                        input_shape=(1, 28, 28),
                                        nb_classes=10,
                                        clip_values=(0, 1))

        # Create attack
        copycat_cnn = CopycatCNN(classifier=victim_ptc,
                                 batch_size_fit=BATCH_SIZE,
                                 batch_size_query=BATCH_SIZE,
                                 nb_epochs=NB_EPOCHS,
                                 nb_stolen=NB_STOLEN)

        self.x_train = np.swapaxes(self.x_train, 1, 3)
        thieved_ptc = copycat_cnn.extract(x=self.x_train,
                                          thieved_classifier=thieved_ptc)
        victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]),
                                 axis=1)
        thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]),
                                  axis=1)
        self.x_train = np.swapaxes(self.x_train, 1, 3)

        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)
Exemple #19
0
 def test_nb_classes(self):
     classifier = get_classifier_pt()
     self.assertEqual(classifier.nb_classes(), 10)
Exemple #20
0
 def test_input_shape(self):
     classifier = get_classifier_pt()
     self.assertEqual(classifier.input_shape, (1, 28, 28))
Exemple #21
0
 def test_pytorch_mnist_targeted(self):
     (_, _), (x_test, _) = self.mnist
     classifier = get_classifier_pt()
     x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
     self._test_mnist_targeted(classifier, x_test)
Exemple #22
0
    def test_class_gradient(self):
        classifier = get_classifier_pt()

        # Test all gradients label = None
        gradients = classifier.class_gradient(self.x_test)

        self.assertEqual(gradients.shape, (NB_TEST, 10, 1, 28, 28))

        expected_gradients_1 = np.asarray([
            -0.00367321, -0.0002892, 0.00037825, -0.00053344, 0.00192121,
            0.00112047, 0.0023135, 0.0, 0.0, -0.00391743, -0.0002264,
            0.00238103, -0.00073711, 0.00270405, 0.00389043, 0.00440818,
            -0.00412769, -0.00441795, 0.00081916, -0.00091284, 0.00119645,
            -0.00849089, 0.00547925, 0.0, 0.0, 0.0, 0.0, 0.0
        ])
        np.testing.assert_array_almost_equal(gradients[0, 5, 0, :, 14],
                                             expected_gradients_1,
                                             decimal=4)

        expected_gradients_2 = np.asarray([
            -1.0557442e-03, -1.0079540e-03, -7.7426381e-04, 1.7387437e-03,
            2.1773505e-03, 5.0880131e-05, 1.6497375e-03, 2.6113102e-03,
            6.0904315e-03, 4.1080985e-04, 2.5268074e-03, -3.6661496e-04,
            -3.0568994e-03, -1.1665225e-03, 3.8904310e-03, 3.1726388e-04,
            1.3203262e-03, -1.1720933e-04, -1.4315107e-03, -4.7676827e-04,
            9.7251305e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 5, 0, 14, :],
                                             expected_gradients_2,
                                             decimal=4)

        # Test 1 gradient label = 5
        gradients = classifier.class_gradient(self.x_test, label=5)

        self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28))

        expected_gradients_1 = np.asarray([
            -0.00367321, -0.0002892, 0.00037825, -0.00053344, 0.00192121,
            0.00112047, 0.0023135, 0.0, 0.0, -0.00391743, -0.0002264,
            0.00238103, -0.00073711, 0.00270405, 0.00389043, 0.00440818,
            -0.00412769, -0.00441795, 0.00081916, -0.00091284, 0.00119645,
            -0.00849089, 0.00547925, 0.0, 0.0, 0.0, 0.0, 0.0
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14],
                                             expected_gradients_1,
                                             decimal=4)

        expected_gradients_2 = np.asarray([
            -1.0557442e-03, -1.0079540e-03, -7.7426381e-04, 1.7387437e-03,
            2.1773505e-03, 5.0880131e-05, 1.6497375e-03, 2.6113102e-03,
            6.0904315e-03, 4.1080985e-04, 2.5268074e-03, -3.6661496e-04,
            -3.0568994e-03, -1.1665225e-03, 3.8904310e-03, 3.1726388e-04,
            1.3203262e-03, -1.1720933e-04, -1.4315107e-03, -4.7676827e-04,
            9.7251305e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :],
                                             expected_gradients_2,
                                             decimal=4)

        # Test a set of gradients label = array
        label = np.random.randint(5, size=NB_TEST)
        gradients = classifier.class_gradient(self.x_test, label=label)

        self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28))

        expected_gradients_1 = np.asarray([
            -0.00195835, -0.00134457, -0.00307221, -0.00340564, 0.00175022,
            -0.00239714, -0.00122619, 0.0, 0.0, -0.00520899, -0.00046105,
            0.00414874, -0.00171095, 0.00429184, 0.0075138, 0.00792443,
            0.0019566, 0.00035517, 0.00504575, -0.00037397, 0.00022343,
            -0.00530035, 0.0020528, 0.0, 0.0, 0.0, 0.0, 0.0
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14],
                                             expected_gradients_1,
                                             decimal=4)

        expected_gradients_2 = np.asarray([
            5.0867130e-03, 4.8564533e-03, 6.1040395e-03, 8.6531248e-03,
            -6.0958802e-03, -1.4114541e-02, -7.1085966e-04, -5.0330797e-04,
            1.2943064e-02, 8.2416134e-03, -1.9859453e-04, -9.8110031e-05,
            -3.8902226e-03, -1.2945874e-03, 7.5138002e-03, 1.7720887e-03,
            3.1399354e-04, 2.3657191e-04, -3.0891625e-03, -1.0211228e-03,
            2.0828887e-03, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
            0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00
        ])
        np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :],
                                             expected_gradients_2,
                                             decimal=4)
Exemple #23
0
    def test_pytorch_mnist(self):

        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Create basic PyTorch model
        classifier = get_classifier_pt()

        scores = get_labels_np_array(classifier.predict(x_train))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(y_train,
                                                   axis=1)) / y_train.shape[0]
        logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%',
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(x_test))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('\n[PyTorch, MNIST] Accuracy on test set: %.2f%%',
                    (accuracy * 100))

        # targeted
        # Generate random target classes
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=NB_TEST)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=NB_TEST)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_adv = df.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertFalse((0. == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(y_test,
                                                   axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial examples: %.2f%%',
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemple #24
0
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # First targeted attack and norm=2
        hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = hsj.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # First targeted attack and norm=np.inf
        hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = hsj.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack and norm=2
        hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10)
        x_test_adv = hsj.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Second untargeted attack and norm=np.inf
        hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
        x_test_adv = hsj.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Exemple #25
0
 def test_pytorch_mnist(self):
     (_, _), (x_test, y_test) = self.mnist
     x_test = np.reshape(x_test,
                         (x_test.shape[0], 1, 28, 28)).astype(np.float32)
     classifier = get_classifier_pt()
     self._test_backend_mnist(classifier, x_test, y_test)