def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Create simple CNN
        model = Sequential()
        model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)

        # First attack
        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).all())

        # Third attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        y_pred = np.argmax(krc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())
    def _create_krclassifier():
        """
        To create a simple KerasClassifier for testing.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get the classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)

        return krc
    def test_with_defences(self):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1')

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        print(
            '\nAccuracy on adversarial train examples with feature squeezing: %.2f%%'
            % (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
Exemple #4
0
    def _cnn_mnist_k(input_shape):
        # Create simple CNN
        model = Sequential()
        model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        classifier = KerasClassifier((0, 1), model, use_logits=False)
        return classifier
    def test_binary_activation_detector(self):
        """
        Test the binary activation detector end-to-end.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier, _ = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:NB_TRAIN])
        x_test_adv = attacker.generate(x_test[:NB_TRAIN])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0)
        y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0)

        # Create a simple CNN for the detector
        activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:]
        number_outputs = 2
        model = Sequential()
        model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape))
        model.add(Flatten())
        model.add(Dense(number_outputs, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it.
        # Detector consider activations at layer=0:
        detector = BinaryActivationDetector(classifier=classifier,
                                            detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False),
                                            layer=0)
        detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug('Number of true positives detected: %i', nb_true_positives)
        logger.debug('Number of true negatives detected: %i', nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
Exemple #6
0
    def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 10, 10, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)

        # Attack
        # TODO Launch with all possible attacks
        attack_params = {
            "attacker": "newtonfool",
            "attacker_params": {
                "max_iter": 20
            }
        }
        up = UniversalPerturbation(krc)
        x_train_adv = up.generate(x_train, **attack_params)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.v
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)

        # Attack
        nf = NewtonFool(krc)
        nf.set_params(max_iter=5)
        x_test_adv = nf.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        y_pred = krc.predict(x_test)
        y_pred_adv = krc.predict(x_test_adv)
        y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
        y_pred_max = y_pred.max(axis=1)
        y_pred_adv_max = y_pred_adv[y_pred_bool]
        self.assertTrue((y_pred_max >= y_pred_adv_max).all())
Exemple #8
0
    def test_binary_input_detector(self):
        """
        Test the binary input detector end-to-end.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        input_shape = x_train.shape[1:]
        nb_classes = 10

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(nb_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create classifier and train it:
        classifier = KerasClassifier((0, 1), model, use_logits=False)
        classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:nb_train])
        x_test_adv = attacker.generate(x_test[:nb_test])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv),
                                          axis=0)
        y_train_detector = np.concatenate(
            (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)),
            axis=0)

        # Create a simple CNN for the detector.
        # Note: we use the same architecture as for the classifier, except for the number of outputs (=2)
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(2, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it:
        detector = BinaryInputDetector(
            KerasClassifier((0, 1), model, use_logits=False))
        detector.fit(x_train_detector,
                     y_train_detector,
                     nb_epochs=2,
                     batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector(x_test), axis=1)
        test_adv_detection = np.argmax(detector(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative:
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        self.assertTrue(nb_true_positives > 0)
        self.assertTrue(nb_true_negatives > 0)
Exemple #9
0
def main(argv):
    if len(argv) < 2:
        sys.exit("Not enough arguments provided.")

    global network_definition_filename, weights_filename, dataset_filename

    i = 1
    while i <= 8:
        arg = str(argv[i])
        print(arg)
        if arg == "--data":
            dataset_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--networkdefinition":
            network_definition_filename = os.path.join(os.environ["DATA_DIR"],
                                                       str(argv[i + 1]))
        if arg == "--weights":
            weights_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--epsilon":
            epsilon = float(argv[i + 1])

        i += 2

    print("dataset : ", dataset_filename)
    print("network definition : ", network_definition_filename)
    print("weights : ", weights_filename)

    # load & compile model
    json_file = open(network_definition_filename, 'r')
    model_json = json_file.read()
    json_file.close()
    model = model_from_json(model_json)
    model.load_weights(weights_filename)
    comp_params = {
        'loss': 'categorical_crossentropy',
        'optimizer': 'adam',
        'metrics': ['accuracy']
    }
    model.compile(**comp_params)

    # create keras classifier
    classifier = KerasClassifier((0, 1), model)

    # load data set
    pf = np.load(dataset_filename)

    x = pf['x_test']
    y = pf['y_test']

    # pre-process numpy array

    x = np.expand_dims(x, axis=3)
    x = x.astype('float32') / 255

    y = np_utils.to_categorical(y, 10)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics = get_metrics(model, x, x_samples, y)

    print("metrics : ", metrics)

    report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt")

    with open(report_file, "w") as report:
        report.write(json.dumps(metrics))

    adv_samples_file = os.path.join(os.environ["RESULT_DIR"], 'adv_samples')
    print("adversarial samples saved to : ", adv_samples_file)
    np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
    def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10)

        # First attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=True,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target != y_pred_adv) / float(len(target))))
        self.assertTrue((target != y_pred_adv).any())

        # Third attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        y_pred = np.argmax(krc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % y_pred)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(y_pred != y_pred_adv) / float(len(y_pred))))
        self.assertTrue((y_pred != y_pred_adv).any())