Exemple #1
0
    def test_subsetscan_detector(self):
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset("mnist")
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier = get_image_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.5)
        x_train_adv = attacker.generate(x_train)
        x_test_adv = attacker.generate(x_test)

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train, x_train_adv), axis=0)

        bgd = x_train
        clean = x_test
        anom = x_test_adv

        detector = SubsetScanningDetector(classifier, bgd, layer=1)

        _, _, dpwr = detector.scan(clean, clean)
        self.assertAlmostEqual(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, anom)
        self.assertGreater(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15)
        self.assertGreater(dpwr, 0.5)
    def test_without_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper
        classifier = QueryEfficientBBGradientEstimation(self.classifier_k,
                                                        20,
                                                        1 / 64.,
                                                        round_samples=1 / 255.)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on adversarial train examples with limited query info: %.2f%%',
            (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with limited query info: %.2f%%',
            (acc * 100))
Exemple #3
0
    def test_2_pt(self):
        """
        Test with a PyTorch Classifier.
        :return:
        """
        # Build KerasClassifier
        ptc = get_image_classifier_pt()

        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        x_test = x_test.transpose(0, 3, 1, 2).astype(np.float32)

        # First FGSM attack:
        fgsm = FastGradientMethod(estimator=ptc, targeted=True)
        params = {"y": random_targets(y_test, ptc.nb_classes)}
        x_test_adv = fgsm.generate(x_test, **params)

        # Initialize RS object and attack with FGSM
        rs = PyTorchRandomizedSmoothing(
            model=ptc.model,
            loss=ptc._loss,
            optimizer=torch.optim.Adam(ptc.model.parameters(), lr=0.01),
            input_shape=ptc.input_shape,
            nb_classes=ptc.nb_classes,
            channels_first=ptc.channels_first,
            clip_values=ptc.clip_values,
            sample_size=100,
            scale=0.01,
            alpha=0.001,
        )
        fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True)
        x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params)

        # Compare results
        # check shapes are equal and values are within a certain range
        self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape)
        self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all())

        # Check basic functionality of RS object
        # check predict
        y_test_smooth = rs.predict(x=x_test)
        y_test_base = ptc.predict(x=x_test)
        self.assertEqual(y_test_smooth.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all())
        self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all())

        # check certification
        pred, radius = rs.certify(x=x_test, n=250)
        self.assertEqual(len(pred), NB_TEST)
        self.assertEqual(len(radius), NB_TEST)
        self.assertTrue((radius <= 1).all())
        self.assertTrue((pred < y_test.shape[1]).all())

        # loss gradient
        grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True)
        assert grad.shape == (10, 1, 28, 28)

        # fit
        rs.fit(x=x_test, y=y_test)
    def test_krclassifier(self):
        """
        Test with a KerasClassifier.
        :return:
        """
        # Build KerasClassifier
        krc = get_image_classifier_kr()

        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        # First attack (without EoT):
        fgsm = FastGradientMethod(estimator=krc, targeted=True)
        params = {"y": random_targets(y_test, krc.nb_classes)}
        x_test_adv = fgsm.generate(x_test, **params)

        # Second attack (with EoT):
        def t(x):
            return x

        def transformation():
            while True:
                yield t

        eot = ExpectationOverTransformations(classifier=krc,
                                             sample_size=1,
                                             transformation=transformation)
        fgsm_with_eot = FastGradientMethod(estimator=eot, targeted=True)
        x_test_adv_with_eot = fgsm_with_eot.generate(x_test, **params)

        self.assertTrue(
            (np.abs(x_test_adv - x_test_adv_with_eot) < 0.001).all())
Exemple #5
0
    def test_with_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model,
                                     clip_values=(0, 1),
                                     preprocessing_defences=fs)

        # Create the classifier
        classifier = QueryEfficientGradientEstimationClassifier(
            classifier, 20, 1 / 64.0, round_samples=1 / 255.0)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())
Exemple #6
0
    def test_3_kr(self):
        """
        Test with a Keras Classifier.
        :return:
        """
        # Build KerasClassifier
        classifier = get_image_classifier_kr()

        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        # First FGSM attack:
        fgsm = FastGradientMethod(estimator=classifier, targeted=True)
        params = {"y": random_targets(y_test, classifier.nb_classes)}
        x_test_adv = fgsm.generate(x_test, **params)

        # Initialize RS object and attack with FGSM
        rs = NumpyRandomizedSmoothing(
            classifier=classifier,
            sample_size=100,
            scale=0.01,
            alpha=0.001,
        )
        fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True)
        x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params)

        # Compare results
        # check shapes are equal and values are within a certain range
        self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape)
        self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all())

        # Check basic functionality of RS object
        # check predict
        y_test_smooth = rs.predict(x=x_test)
        y_test_base = classifier.predict(x=x_test)
        self.assertEqual(y_test_smooth.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all())
        self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all())

        # check certification
        pred, radius = rs.certify(x=x_test, n=250)
        self.assertEqual(len(pred), NB_TEST)
        self.assertEqual(len(radius), NB_TEST)
        self.assertTrue((radius <= 1).all())
        self.assertTrue((pred < y_test.shape[1]).all())

        # loss gradient
        grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True)
        assert grad.shape == (10, 28, 28, 1)

        # fit
        rs.fit(x=x_test, y=y_test)
Exemple #7
0
    def test_binary_activation_detector(self):
        """
        Test the binary activation detector end-to-end.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier = get_image_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:NB_TRAIN])
        x_test_adv = attacker.generate(x_test[:NB_TRAIN])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0)
        y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0)

        # Create a simple CNN for the detector
        activation_shape = classifier.get_activations(x_test[:1], 0, batch_size=128).shape[1:]
        number_outputs = 2
        model = Sequential()
        model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape))
        model.add(Flatten())
        model.add(Dense(number_outputs, activation="softmax"))
        model.compile(
            loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]
        )

        # Create detector and train it.
        # Detector consider activations at layer=0:
        detector = BinaryActivationDetector(
            classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0
        )
        detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug("Number of true positives detected: %i", nb_true_positives)
        logger.debug("Number of true negatives detected: %i", nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.36)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemple #9
0
def _fgsm(model, data, labels, attack_args):
    """
    Fast Gradient Sign Method
    Explaining and Harnessing Adversarial Examples
    by Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy
    ``https://arxiv.org/abs/1412.6572``
    :param model:
    :param data:
    :param labels:
    :param attack_args:
    :return:
    """
    print('>>> Generating FGSM examples.')
    eps = attack_args.get('eps', 0.3)

    targeted = attack_args.get('targeted', False)
    num_random_init = attack_args.get('num_random_init', 0)
    minimal = attack_args.get('minimal', False)

    attacker = FastGradientMethod(model,
                                  eps=eps,
                                  eps_step=eps,
                                  targeted=targeted,
                                  num_random_init=num_random_init,
                                  minimal=minimal)
    return attacker.generate(data, labels)
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        classifier = get_tabular_classifier_kr()
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info("Accuracy on Iris with limited query info: %.2f%%",
                    (acc * 100))
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_tabular_classifier_kr()

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channels_first=True)
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info("Accuracy on Iris with limited query info: %.2f%%",
                    (acc * 100))
Exemple #12
0
    def test_without_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model and wrap it in query efficient gradient estimator
        classifier = QueryEfficientGradientEstimationClassifier(
            self.classifier_k, 20, 1 / 64.0, round_samples=1 / 255.0)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())
    def test_with_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model,
                                     clip_values=(0, 1),
                                     preprocessing_defences=fs)
        # Wrap the classifier
        classifier = QueryEfficientBBGradientEstimation(classifier,
                                                        20,
                                                        1 / 64.0,
                                                        round_samples=1 /
                                                        255.0)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        logger.info(
            "Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%",
            (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info(
            "Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%",
            (acc * 100))
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemple #15
0
    def test_fit_predict_different_classifiers(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = FastGradientMethod(self.classifier)
        x_test_adv = attack.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_2, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.32)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # fit_generator
        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)
        adv_trainer.fit_generator(generator, nb_epochs=5)
        adv_trainer_2 = AdversarialTrainer(self.classifier_2,
                                           attack,
                                           ratio=1.0)
        adv_trainer_2.fit_generator(generator, nb_epochs=5)
Exemple #16
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        classifier = get_tabular_classifier_kr()
        classifier = QueryEfficientGradientEstimationClassifier(
            classifier, 20, 1 / 64.0, round_samples=1 / 255.0)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        ptc = get_tabular_classifier_pt()
        rs = PyTorchRandomizedSmoothing(
            model=ptc.model,
            loss=ptc._loss,
            input_shape=ptc.input_shape,
            nb_classes=ptc.nb_classes,
            channels_first=ptc.channels_first,
            clip_values=ptc.clip_values,
            sample_size=100,
            scale=0.01,
            alpha=0.001,
        )

        # Test untargeted attack
        attack = FastGradientMethod(ptc, eps=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all())

        pred = rs.predict(x_test)
        pred2 = rs.predict(x_test_adv)
        acc, cov = compute_accuracy(pred, y_test)
        acc2, cov2 = compute_accuracy(pred2, y_test)
        logger.info("Accuracy on Iris with smoothing on adversarial examples: %.2f%%", (acc * 100))
        logger.info("Coverage on Iris with smoothing on adversarial examples: %.2f%%", (cov * 100))
        logger.info("Accuracy on Iris with smoothing: %.2f%%", (acc2 * 100))
        logger.info("Coverage on Iris with smoothing: %.2f%%", (cov2 * 100))

        # Check basic functionality of RS object
        # check predict
        y_test_smooth = rs.predict(x=x_test)
        self.assertEqual(y_test_smooth.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all())

        # check certification
        pred, radius = rs.certify(x=x_test, n=250)
        self.assertEqual(len(pred), len(x_test))
        self.assertEqual(len(radius), len(x_test))
        self.assertTrue((radius <= 1).all())
        self.assertTrue((pred < y_test.shape[1]).all())
Exemple #18
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channels_first=True)
        classifier = QueryEfficientGradientEstimationClassifier(
            classifier, 20, 1 / 64.0, round_samples=1 / 255.0)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        classifier = get_iris_classifier_kr()
        classifier = QueryEfficientBBGradientEstimation(classifier,
                                                        20,
                                                        1 / 64.,
                                                        round_samples=1 / 255.)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
    def test_fit_predict(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = FastGradientMethod(self.classifier)
        x_test_adv = attack.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.12)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        classifier = QueryEfficientBBGradientEstimation(classifier,
                                                        20,
                                                        1 / 64.,
                                                        round_samples=1 / 255.)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
Exemple #22
0
def robustness_evaluation(object_storage_url,
                          object_storage_username,
                          object_storage_password,
                          data_bucket_name,
                          result_bucket_name,
                          model_id,
                          feature_testset_path='processed_data/X_test.npy',
                          label_testset_path='processed_data/y_test.npy',
                          clip_values=(0, 1),
                          nb_classes=2,
                          input_shape=(1, 3, 64, 64),
                          model_class_file='model.py',
                          model_class_name='model',
                          LossFn='',
                          Optimizer='',
                          epsilon=0.2):

    url = re.compile(r"https?://")
    cos = Minio(url.sub('', object_storage_url),
                access_key=object_storage_username,
                secret_key=object_storage_password,
                secure=False)

    dataset_filenamex = "X_test.npy"
    dataset_filenamey = "y_test.npy"
    weights_filename = "model.pt"
    model_files = model_id + '/_submitted_code/model.zip'

    cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex)
    cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey)
    cos.fget_object(result_bucket_name, model_id + '/' + weights_filename,
                    weights_filename)
    cos.fget_object(result_bucket_name, model_files, 'model.zip')

    # Load PyTorch model definition from the source code.
    zip_ref = zipfile.ZipFile('model.zip', 'r')
    zip_ref.extractall('model_files')
    zip_ref.close()

    modulename = 'model_files.' + model_class_file.split('.')[0].replace(
        '-', '_')
    '''
    We required users to define where the model class is located or follow
    some naming convention we have provided.
    '''
    model_class = getattr(importlib.import_module(modulename),
                          model_class_name)

    # load & compile model
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model_class().to(device)
    model.load_state_dict(torch.load(weights_filename, map_location=device))

    # Define Loss and optimizer function for the PyTorch model
    if LossFn:
        loss_fn = eval(LossFn)
    else:
        loss_fn = torch.nn.CrossEntropyLoss()
    if Optimizer:
        optimizer = eval(Optimizer)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer,
                                   input_shape, nb_classes)

    # load test dataset
    x = np.load(dataset_filenamex)
    y = np.load(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)
    return metrics
Exemple #23
0
    def test_binary_input_detector(self):
        """
        Test the binary input detector end-to-end.
        :return:
        """
        # Get MNIST
        nb_train, nb_test = 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier = get_image_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:nb_train])
        x_test_adv = attacker.generate(x_test[:nb_test])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv),
                                          axis=0)
        y_train_detector = np.concatenate(
            (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)),
            axis=0)

        # Create a simple CNN for the detector
        input_shape = x_train.shape[1:]
        try:
            from keras.optimizers import Adam

            optimizer = Adam(lr=0.01)
        except ImportError:
            from keras.optimizers import adam_v2

            optimizer = adam_v2.Adam(lr=0.01)
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation="relu",
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(2, activation="softmax"))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=optimizer,
                      metrics=["accuracy"])

        # Create detector and train it:
        detector = BinaryInputDetector(
            KerasClassifier(model=model, clip_values=(0, 1), use_logits=False))
        detector.fit(x_train_detector,
                     y_train_detector,
                     nb_epochs=2,
                     batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative:
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug("Number of true positives detected: %i",
                     nb_true_positives)
        logger.debug("Number of true negatives detected: %i",
                     nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
Exemple #24
0
    def test_1_tf(self):
        """
        Test with a TensorFlow Classifier.
        :return:
        """
        tf_version = list(map(int, tf.__version__.lower().split("+")[0].split(".")))
        if tf_version[0] == 2:

            # Build TensorFlowV2Classifier
            classifier, _ = get_image_classifier_tf()

            # Get MNIST
            (_, _), (x_test, y_test) = self.mnist

            # First FGSM attack:
            fgsm = FastGradientMethod(estimator=classifier, targeted=True)
            params = {"y": random_targets(y_test, classifier.nb_classes)}
            x_test_adv = fgsm.generate(x_test, **params)

            # Initialize RS object and attack with FGSM
            rs = TensorFlowV2RandomizedSmoothing(
                model=classifier.model,
                nb_classes=classifier.nb_classes,
                input_shape=classifier.input_shape,
                loss_object=classifier.loss_object,
                train_step=classifier.train_step,
                channels_first=classifier.channels_first,
                clip_values=classifier.clip_values,
                preprocessing_defences=classifier.preprocessing_defences,
                postprocessing_defences=classifier.postprocessing_defences,
                preprocessing=classifier.preprocessing,
                sample_size=100,
                scale=0.01,
                alpha=0.001,
            )
            fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True)
            x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params)

            # Compare results
            # check shapes are equal and values are within a certain range
            self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape)
            self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all())

            # Check basic functionality of RS object
            # check predict
            y_test_smooth = rs.predict(x=x_test)
            y_test_base = classifier.predict(x=x_test)
            self.assertEqual(y_test_smooth.shape, y_test.shape)
            self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all())
            self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all())

            # check certification
            pred, radius = rs.certify(x=x_test, n=250)
            self.assertEqual(len(pred), NB_TEST)
            self.assertEqual(len(radius), NB_TEST)
            self.assertTrue((radius <= 1).all())
            self.assertTrue((pred < y_test.shape[1]).all())

            # loss gradient
            grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True)
            assert grad.shape == (10, 28, 28, 1)

            # fit
            rs.fit(x=x_test, y=y_test)