def test_with_preprocessing(self):

        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 100
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"])
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_train, Y_train)
        print("\naccuracy on training set: %.2f%%" % (scores[1] * 100))
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        attack_params = {"verbose": 0,
                         "clip_min": 0.,
                         "clip_max": 1.,
                         "eps": 1.}

        attack = FastGradientMethod(classifier, session)
        X_train_adv = attack.generate(X_train, **attack_params)
        X_test_adv = attack.generate(X_test, **attack_params)

        self.assertFalse((X_train == X_train_adv).all())
        self.assertFalse((X_test == X_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(X_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(X_test_adv))

        self.assertFalse((Y_train == train_y_pred).all())
        self.assertFalse((Y_test == test_y_pred).all())

        scores = classifier.evaluate(X_train_adv, Y_train)
        print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100))

        scores = classifier.evaluate(X_test_adv, Y_test)
        print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
    def test_mnist_untargeted(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # Get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        # Perform attack
        df = SaliencyMapMethod(classifier, sess=session)
        df.set_params(clip_min=0, clip_max=1, theta=1)
        x_test_adv = df.generate(X_test)
        self.assertFalse((X_test == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((Y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
コード例 #3
0
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {
            "loss": 'categorical_crossentropy',
            "optimizer": 'adam',
            "metrics": ['accuracy']
        }

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 11
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train,
                       Y_train,
                       epochs=1,
                       batch_size=batch_size,
                       verbose=0)

        # Attack
        nf = NewtonFool(classifier, sess=session)
        nf.set_params(max_iter=20)
        x_test_adv = nf.generate(X_test)
        self.assertFalse((X_test == x_test_adv).all())

        y_pred = classifier.predict(X_test)
        y_pred_adv = classifier.predict(x_test_adv)
        y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
        y_pred_max = y_pred.max(axis=1)
        y_pred_adv_max = y_pred_adv[y_pred_bool]
        self.assertTrue((y_pred_max >= y_pred_adv_max).all())

        scores1 = classifier.evaluate(X_test, Y_test)
        print("\nAccuracy on test set: %.2f%%" % (scores1[1] * 100))
        scores2 = classifier.evaluate(x_test_adv, Y_test)
        print('\nAccuracy on adversarial examples: %.2f%%' %
              (scores2[1] * 100))
        self.assertTrue(scores1[1] != scores2[1])
コード例 #4
0
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # get MNIST
        batch_size, nb_train, nb_test = 10, 10, 10
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        attack_params = {"verbose": 2,
                         "clip_min": 0.,
                         "clip_max": 1,
                         "attacker": "deepfool"}

        attack = UniversalPerturbation(classifier, session)
        x_train_adv = attack.generate(X_train, **attack_params)
        self.assertTrue((attack.fooling_rate >= 0.2) or not attack.converged)

        x_test_adv = X_test + attack.v
        self.assertFalse((X_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((Y_test == test_y_pred).all())
        self.assertFalse((Y_train == train_y_pred).all())

        scores = classifier.evaluate(x_train_adv, Y_train)
        print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100))

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
コード例 #5
0
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {
            "loss": 'categorical_crossentropy',
            "optimizer": 'adam',
            "metrics": ['accuracy']
        }

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train,
                       Y_train,
                       epochs=1,
                       batch_size=batch_size,
                       verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        df = CarliniL2Method(classifier,
                             sess=session,
                             targeted=False,
                             max_iterations=100,
                             binary_search_steps=2,
                             learning_rate=1e-2,
                             initial_const=1)
        params = {
            'y_val':
            random_targets(Y_test,
                           classifier.model.get_output_shape_at(-1)[-1])
        }
        x_test_adv = df.generate(X_test, **params)
        self.assertFalse((X_test == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((Y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
    def test_mnist_targeted(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]
        im_shape = x_train[0].shape

        # Get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(x_train, y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(x_test, y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        # Generate random target classes
        import numpy as np
        nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=nb_test)
        while (targets == np.argmax(y_test, axis=1)).any():
            targets = np.random.randint(nb_classes, size=nb_test)

        # Perform attack
        df = SaliencyMapMethod(classifier, sess=session, clip_min=0, clip_max=1, theta=1)
        x_test_adv = df.generate(x_test, y_val=targets)
        self.assertFalse((x_test == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))