コード例 #1
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        classifier = get_iris_classifier_kr()
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
コード例 #2
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        krc = KerasClassifier(model=classifier._model,
                              use_logits=False,
                              channel_index=1)
        rs = RandomizedSmoothing(classifier=krc,
                                 sample_size=100,
                                 scale=0.01,
                                 alpha=0.001)
        attack = FastGradientMethod(rs, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all())

        pred = rs.predict(x_test)
        pred2 = rs.predict(x_test_adv)
        acc, cov = compute_accuracy(pred, y_test)
        acc2, cov2 = compute_accuracy(pred2, y_test)
        logger.info(
            'Accuracy on Iris with smoothing on adversarial examples: %.2f%%',
            (acc * 100))
        logger.info(
            'Coverage on Iris with smoothing on adversarial examples: %.2f%%',
            (cov * 100))
        logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100))
        logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100))
コード例 #4
0
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)

        # Norm=2
        attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100))

        # Norm=np.inf
        attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100))

        # Clean-up session
        k.clear_session()
コード例 #5
0
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(
            predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (accuracy * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = FastGradientMethod(classifier, targeted=True, eps=.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
        accuracy = np.sum(
            predictions_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted FGM on Iris: %.2f%%',
                    (accuracy * 100))
コード例 #6
0
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Norm=2
        attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100))

        # Norm=np.inf
        attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100))

        # Clean-up session
        k.clear_session()
コード例 #7
0
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted PGD on Iris: %.2f%%',
                    (acc * 100))
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        krc = get_iris_classifier_kr()
        rs = RandomizedSmoothing(classifier=krc,
                                 sample_size=100,
                                 scale=0.01,
                                 alpha=0.001)

        # Test untargeted attack
        attack = FastGradientMethod(krc, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all())

        pred = rs.predict(x_test)
        pred2 = rs.predict(x_test_adv)
        acc, cov = compute_accuracy(pred, y_test)
        acc2, cov2 = compute_accuracy(pred2, y_test)
        logger.info(
            'Accuracy on Iris with smoothing on adversarial examples: %.2f%%',
            (acc * 100))
        logger.info(
            'Coverage on Iris with smoothing on adversarial examples: %.2f%%',
            (cov * 100))
        logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100))
        logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100))

        # Check basic functionality of RS object
        # check predict
        y_test_smooth = rs.predict(x=x_test)
        self.assertEqual(y_test_smooth.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all())

        # check gradients
        grad_smooth1 = rs.loss_gradient(x=x_test, y=y_test)
        grad_smooth2 = rs.class_gradient(x=x_test, label=None)
        grad_smooth3 = rs.class_gradient(x=x_test,
                                         label=np.argmax(y_test, axis=1))
        self.assertEqual(grad_smooth1.shape, x_test_adv.shape)
        self.assertEqual(grad_smooth2.shape[0], len(x_test))
        self.assertEqual(grad_smooth3.shape[0], len(x_test))

        # check certification
        pred, radius = rs.certify(x=x_test, n=250)
        self.assertEqual(len(pred), len(x_test))
        self.assertEqual(len(radius), len(x_test))
        self.assertTrue((radius <= 1).all())
        self.assertTrue((pred < y_test.shape[1]).all())
コード例 #9
0
    def test_keras_iris_vector_unbounded(self):

        (_, _), (x_test, y_test) = self.iris

        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Test untargeted attack
        attack = VirtualAdversarialMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with VAT adversarial examples: %.2f%%', (acc * 100))
コード例 #11
0
    def test_keras_iris_clipped_L2(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()
        attack = CarliniL2Method(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(
            predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%',
                    (accuracy * 100))
コード例 #12
0
 def test_keras_iris_clipped(self):
     (_, _), (x_test, y_test) = self.iris
     classifier = get_iris_classifier_kr()
     attack = ElasticNet(classifier, targeted=False, max_iter=10)
     x_test_adv = attack.generate(x_test)
     expected_x_test_adv = np.asarray([0.85931635, 0.44633555, 0.65658355, 0.23840423])
     np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6)
     self.assertLessEqual(np.amax(x_test_adv), 1.0)
     self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
     predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
     np.testing.assert_array_equal(predictions_adv, np.asarray([1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2,
                                                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 0, 1,
                                                                1, 1, 2, 0, 2, 2, 1, 1, 2]))
     accuracy = 1.0 - np.sum(predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
     logger.info('EAD success rate on Iris: %.2f%%', (accuracy * 100))
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        attack = VirtualAdversarialMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with VAT adversarial examples: %.2f%%', (acc * 100))
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        attack_params = {"max_iter": 1, "attacker": "newtonfool", "attacker_params": {"max_iter": 5}}
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
コード例 #15
0
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        attack = ElasticNet(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(x_test)
        expected_x_test_adv = np.asarray([0.85931635, 0.44633555, 0.65658355, 0.23840423])
        np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6)
        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        np.testing.assert_array_equal(predictions_adv, np.asarray([1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2,
                                                                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 0, 1,
                                                                   1, 1, 2, 0, 2, 2, 1, 1, 2]))
        accuracy = 1.0 - np.sum(predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('EAD success rate on Iris: %.2f%%', (accuracy * 100))
コード例 #16
0
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        attack = NewtonFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with NewtonFool adversarial examples: %.2f%%',
            (acc * 100))
コード例 #17
0
    def test_failure_feature_vectors(self):
        attack_params = {
            "max_translation": 10.0,
            "num_translations": 3,
            "max_rotation": 30.0,
            "num_rotations": 3
        }
        classifier = get_iris_classifier_kr()
        attack = SpatialTransformation(classifier=classifier)
        attack.set_params(**attack_params)
        data = np.random.rand(10, 4)

        # Assert that value error is raised for feature vectors
        with self.assertRaises(ValueError) as context:
            attack.generate(data)

        self.assertIn('Feature vectors detected.', str(context.exception))
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Test untargeted attack
        attack_params = {"max_iter": 1, "attacker": "newtonfool", "attacker_params": {"max_iter": 5}}
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
コード例 #19
0
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = NewtonFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with NewtonFool adversarial examples: %.2f%%',
            (acc * 100))
コード例 #20
0
    def test_keras_iris_vector_clipped(self):

        (_, _), (x_test, y_test) = self.iris

        classifier = get_iris_classifier_kr()

        attack = SaliencyMapMethod(classifier, theta=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        accuracy = np.sum(
            preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%',
                    (accuracy * 100))
コード例 #21
0
    def test_keras_iris_unbounded_L2(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = CarliniL2Method(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(
            predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%',
                    (accuracy * 100))
コード例 #22
0
    def test_failure_feature_vectors(self):
        attack_params = {
            "rotation_max": 22.5,
            "scale_min": 0.1,
            "scale_max": 1.0,
            "learning_rate": 5.0,
            "number_of_steps": 5,
            "batch_size": 10
        }
        classifier = get_iris_classifier_kr()
        attack = AdversarialPatch(classifier=classifier)
        attack.set_params(**attack_params)
        data = np.random.rand(10, 4)

        # Assert that value error is raised for feature vectors
        with self.assertRaises(ValueError) as context:
            attack.generate(data)

        self.assertIn('Feature vectors detected.', str(context.exception))
コード例 #23
0
    def test_iris_kr(self):
        """
        Second test for Keras.
        :return:
        """
        # Build KerasClassifier
        victim_krc = get_iris_classifier_kr()

        # Create simple CNN
        model = Sequential()
        model.add(Dense(10, input_shape=(4, ), activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(3, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer=keras.optimizers.Adam(lr=0.001),
                      metrics=['accuracy'])

        # Get classifier
        thieved_krc = KerasClassifier(model,
                                      clip_values=(0, 1),
                                      use_logits=False,
                                      channel_index=1)

        # Create attack
        copycat_cnn = CopycatCNN(classifier=victim_krc,
                                 batch_size_fit=BATCH_SIZE,
                                 batch_size_query=BATCH_SIZE,
                                 nb_epochs=NB_EPOCHS,
                                 nb_stolen=NB_STOLEN)
        thieved_krc = copycat_cnn.extract(x=self.x_train,
                                          thieved_classifier=thieved_krc)

        victim_preds = np.argmax(victim_krc.predict(x=self.x_train[:100]),
                                 axis=1)
        thieved_preds = np.argmax(thieved_krc.predict(x=self.x_train[:100]),
                                  axis=1)
        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)

        # Clean-up
        k.clear_session()
コード例 #24
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        classifier = get_iris_classifier_kr()
        classifier = QueryEfficientBBGradientEstimation(classifier,
                                                        20,
                                                        1 / 64.,
                                                        round_samples=1 / 255.)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
コード例 #25
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        classifier = QueryEfficientBBGradientEstimation(classifier,
                                                        20,
                                                        1 / 64.,
                                                        round_samples=1 / 255.)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))