Ejemplo n.º 1
0
    def test_without_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper
        classifier = QueryEfficientBBGradientEstimation(self.classifier_k, 20, 1 / 64., round_samples=1 / 255.)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with limited query info: %.2f%%',
                    (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples with limited query info: %.2f%%', (acc * 100))
Ejemplo n.º 2
0
    def test_with_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs)
        # Wrap the classifier
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%',
                    (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%',
                    (acc * 100))
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = FastGradientMethod(classifier,
                                    targeted=True,
                                    eps=.1,
                                    batch_size=128)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted FGM on Iris: %.2f%%',
                    (acc * 100))
def fgsm(clf, x_train, x_test, epsilon=0.1):
    from art.attacks.fast_gradient import FastGradientMethod
    epsilon = .1  # Maximum perturbation
    fgsm_adv_crafter = FastGradientMethod(clf, eps=epsilon)
    x_test_fgsm_adv = fgsm_adv_crafter.generate(x=x_test)
    x_train_fgsm_adv = fgsm_adv_crafter.generate(x=x_train)
    return x_train_fgsm_adv, x_test_fgsm_adv
Ejemplo n.º 5
0
    def _test_with_defences(self, custom_activation=False):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1', custom_activation=custom_activation)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with feature squeezing: %.2f%%', (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
    def test_subsetscan_detector(self):
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier, _ = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.5)
        x_train_adv = attacker.generate(x_train)
        x_test_adv = attacker.generate(x_test)

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train, x_train_adv), axis=0)

        bgd = x_train
        clean = x_test
        anom = x_test_adv

        detector = SubsetScanningDetector(classifier, bgd, layer=1)

        _, _, dpwr = detector.scan(clean, clean)
        self.assertAlmostEqual(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, anom)
        self.assertGreater(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15)
        self.assertGreater(dpwr, 0.5)
    def test_with_defences(self):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1')

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        print(
            '\nAccuracy on adversarial train examples with feature squeezing: %.2f%%'
            % (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
    def test_with_preprocessing(self):

        session = tf.Session()
        k.set_session(session)

        comp_params = {
            "loss": 'categorical_crossentropy',
            "optimizer": 'adam',
            "metrics": ['accuracy']
        }

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 100
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"])
        classifier.compile(comp_params)
        classifier.fit(X_train,
                       Y_train,
                       epochs=1,
                       batch_size=batch_size,
                       verbose=0)
        scores = classifier.evaluate(X_train, Y_train)
        print("\naccuracy on training set: %.2f%%" % (scores[1] * 100))
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        attack_params = {
            "verbose": 0,
            "clip_min": 0.,
            "clip_max": 1.,
            "eps": 1.
        }

        attack = FastGradientMethod(classifier, session)
        X_train_adv = attack.generate(X_train, **attack_params)
        X_test_adv = attack.generate(X_test, **attack_params)

        self.assertFalse((X_train == X_train_adv).all())
        self.assertFalse((X_test == X_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(X_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(X_test_adv))

        self.assertFalse((Y_train == train_y_pred).all())
        self.assertFalse((Y_test == test_y_pred).all())

        scores = classifier.evaluate(X_train_adv, Y_train)
        print('\naccuracy on adversarial train examples: %.2f%%' %
              (scores[1] * 100))

        scores = classifier.evaluate(X_test_adv, Y_test)
        print('\naccuracy on adversarial test examples: %.2f%%' %
              (scores[1] * 100))
    def test_binary_activation_detector(self):
        """
        Test the binary activation detector end-to-end.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier, _ = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:NB_TRAIN])
        x_test_adv = attacker.generate(x_test[:NB_TRAIN])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0)
        y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0)

        # Create a simple CNN for the detector
        activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:]
        number_outputs = 2
        model = Sequential()
        model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape))
        model.add(Flatten())
        model.add(Dense(number_outputs, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it.
        # Detector consider activations at layer=0:
        detector = BinaryActivationDetector(classifier=classifier,
                                            detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False),
                                            layer=0)
        detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug('Number of true positives detected: %i', nb_true_positives)
        logger.debug('Number of true negatives detected: %i', nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
def main(argv):
    if len(argv) < 2:
        sys.exit("Not enough arguments provided.")

    global network_definition_filename, weights_filename, dataset_filename

    i = 1
    while i <= 8:
        arg = str(argv[i])
        print(arg)
        if arg == "--datax":
            dataset_filenamex = os.path.join(os.environ["DATA_DIR"],
                                             str(argv[i + 1]))
        if arg == "--datay":
            dataset_filenamey = os.path.join(os.environ["DATA_DIR"],
                                             str(argv[i + 1]))
        if arg == "--weights":
            weights_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--epsilon":
            epsilon = float(argv[i + 1])

        i += 2

    print("dataset_x:", dataset_filenamex)
    print("dataset_y:", dataset_filenamey)
    print("weights:", weights_filename)

    # load & compile model
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = ThreeLayerCNN().to(device)
    model.load_state_dict(torch.load(weights_filename))
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier((0, 1), model, loss_fn, optimizer,
                                   (1, 3, 64, 64), 2)

    # load data set
    x = np.load(dataset_filenamex)
    y = np.loadtxt(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)

    report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt")

    with open(report_file, "w") as report:
        report.write(json.dumps(metrics))

    adv_samples_file = os.path.join(os.environ["RESULT_DIR"], "adv_samples")
    print("adversarial samples saved to: ", adv_samples_file)
    np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
    def _test_with_defences(self, custom_activation=False):
        from art.defences import FeatureSqueezing

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model,
                                     clip_values=(0, 1),
                                     defences=fs,
                                     custom_activation=custom_activation)

        attack = FastGradientMethod(classifier, eps=1, batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on MNIST with FGM adversarial train examples with feature squeezing: %.2f%%',
            (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on MNIST with FGM adversarial test examples: %.2f%%',
            (acc * 100))
Ejemplo n.º 12
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit target classifier
        comp_params = {
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        }
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier_tgt = CNN(im_shape, dataset='mnist')
        classifier_tgt.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create source classifier
        classifier_src = CNN(im_shape, dataset='mnist')
        classifier_src.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src, session)
        adv2 = DeepFool(classifier_src, session)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        print(y_adv.shape)
        acc = classifier_tgt.evaluate(x_adv, y_adv)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2])
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv)
        self.assertTrue(acc_adv_trained >= acc)
Ejemplo n.º 13
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
Ejemplo n.º 14
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        classifier, _ = get_iris_classifier_kr()
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
Ejemplo n.º 15
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        # Test FGSM with np.inf norm
        attack = FastGradientMethod(classifier, eps=1.0, targeted=True)
        
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0

        x_test_adv = attack.generate(x_test, minimal=True, eps_step=0.01, eps=1.0, y=y_test_adv)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        self.assertTrue((y_test_adv == test_y_pred).sum() >= x_test.shape[0] // 2)
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (acc * 100))
Ejemplo n.º 17
0
    def test_shared_model_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same
        CNN on MNIST trained for 5 epochs. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit classifier
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier = CNN(im_shape, dataset='mnist')
        classifier.compile({
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        })
        classifier.fit(x_train, y_train, **params)

        # Create FGSM attacker
        adv = FastGradientMethod(classifier, session)
        x_adv = adv.generate(x_test)
        acc = classifier.evaluate(x_adv, y_test)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier, adv)
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_test)
        self.assertTrue(acc_adv_trained >= acc)
Ejemplo n.º 18
0
        if args.adv_method == "rnd_fgsm":
            x_train = np.clip(
                X_train + alpha * np.sign(np.random.randn(*X_train.shape)),
                min_, max_)
            x_test = np.clip(
                X_test + alpha * np.sign(np.random.randn(*X_test.shape)), min_,
                max_)
            e = eps - alpha
        else:
            x_train = X_train
            x_test = X_test
            e = eps

        X_train_adv = adv_crafter.generate(x_val=x_train,
                                           eps=e,
                                           clip_min=min_,
                                           clip_max=max_)
        X_test_adv = adv_crafter.generate(x_val=x_test,
                                          eps=e,
                                          clip_min=min_,
                                          clip_max=max_)

        if args.save:
            np.save(os.path.join(SAVE_ADV, "eps%.2f_train.npy" % eps),
                    X_train_adv)
            np.save(os.path.join(SAVE_ADV, "eps%.2f_test.npy" % eps),
                    X_test_adv)

else:
    if args.adv_method == 'deepfool':
        adv_crafter = DeepFool(classifier,
Ejemplo n.º 19
0
mnist_classifier = PyTorchClassifier(clip_values=(0, 1), model=model, loss=criterion, optimizer=optimizer, 
                                     input_shape=(1, 28, 28), nb_classes=10)

# Train the classifier
mnist_classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)

# Test the classifier
predictions = mnist_classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy before attack: {}%'.format(accuracy * 100))

start = time.time()
# Craft the adversarial examples
epsilon = 0.2  # Maximum perturbation
adv_crafter = FastGradientMethod(mnist_classifier, eps=epsilon)
x_test_adv = adv_crafter.generate(x=x_test)
x_train_adv = adv_crafter.generate(x=x_train)
torch.save(x_test_adv, 'tensors/x_test_adv.pt')
torch.save(x_train_adv, 'tensors/x_train_adv.pt')

end = time.time()

# Test the classifier on adversarial exmaples
predictions = mnist_classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy after attack: {}%'.format(accuracy * 100))

mnist_classifier.save('mnist_fgsm_state_dict', 'models')
# print((mnist_classifier)) 
# torch.save(model.state_dict(), 'models/mnist_fgsm_state_dict')
# torch.save(x_test, 'tensors/test_imgs_mnist.pt')
Ejemplo n.º 20
0
    def test_binary_input_detector(self):
        """
        Test the binary input detector end-to-end.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        input_shape = x_train.shape[1:]
        nb_classes = 10

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(nb_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create classifier and train it:
        classifier = KerasClassifier((0, 1), model, use_logits=False)
        classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:nb_train])
        x_test_adv = attacker.generate(x_test[:nb_test])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv),
                                          axis=0)
        y_train_detector = np.concatenate(
            (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)),
            axis=0)

        # Create a simple CNN for the detector.
        # Note: we use the same architecture as for the classifier, except for the number of outputs (=2)
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(2, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it:
        detector = BinaryInputDetector(
            KerasClassifier((0, 1), model, use_logits=False))
        detector.fit(x_train_detector,
                     y_train_detector,
                     nb_epochs=2,
                     batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector(x_test), axis=1)
        test_adv_detection = np.argmax(detector(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative:
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        self.assertTrue(nb_true_positives > 0)
        self.assertTrue(nb_true_negatives > 0)
from art.utils import load_dataset

# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist')
im_shape = x_train[0].shape

# Construct a convolutional neural network
comp_params = {'loss': 'categorical_crossentropy',
               'optimizer': 'adam',
               'metrics': ['accuracy']}
classifier = CNN(im_shape, act='relu', dataset='mnist')
classifier.compile(comp_params)
classifier.fit(x_train, y_train, validation_split=.1, epochs=5, batch_size=128)

# Evaluate the classifier on the test set
scores = classifier.evaluate(x_test, y_test)
print("\nTest loss: %.2f%%\nTest accuracy: %.2f%%" % (scores[0], scores[1] * 100))

# Craft adversarial samples with FGSM
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier, sess=session)
x_test_adv = adv_crafter.generate(x_val=x_test, eps=epsilon, clip_min=min_, clip_max=max_)

# Evaluate the classifier on the adversarial examples
scores = classifier.evaluate(x_test_adv, y_test)
print("\nTest loss: %.2f%%\nTest accuracy: %.2f%%" % (scores[0], scores[1] * 100))
Ejemplo n.º 22
0
              metrics=['accuracy'])

classifier = KerasClassifier((min_, max_), model=model)
classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples

################ FGSM;
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier)
x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon)
# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
# grab a particular example to play wit
a = (preds != np.argmax(y_test, axis=1))
nat_img = x_test[a]
adv_img = x_test_adv[a]
adv_nse = adv_img - nat_img
adv_prd = preds[a]
# compute variance and plot (some) example(s)
adv_var = np.sqrt(np.var(adv_nse) / np.var(nat_img))
adv_plt = np.column_stack([
    nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28),
    adv_img[0].reshape(28, 28)
Ejemplo n.º 23
0
def robustness_check(object_storage_url,
                     object_storage_username,
                     object_storage_password,
                     data_bucket_name,
                     result_bucket_name,
                     model_id,
                     feature_testset_path='processed_data/X_test.npy',
                     label_testset_path='processed_data/y_test.npy',
                     clip_values=(0, 1),
                     nb_classes=2,
                     input_shape=(1, 3, 64, 64),
                     model_class_file='model.py',
                     model_class_name='model',
                     LossFn='',
                     Optimizer='',
                     epsilon=0.2):

    url = re.compile(r"https?://")
    cos = Minio(url.sub('', object_storage_url),
                access_key=object_storage_username,
                secret_key=object_storage_password)

    dataset_filenamex = "X_test.npy"
    dataset_filenamey = "y_test.npy"
    weights_filename = "model.pt"
    model_files = model_id + '/_submitted_code/model.zip'

    cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex)
    cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey)
    cos.fget_object(result_bucket_name, model_id + '/' + weights_filename,
                    weights_filename)
    cos.fget_object(result_bucket_name, model_files, 'model.zip')

    # Load PyTorch model definition from the source code.
    zip_ref = zipfile.ZipFile('model.zip', 'r')
    zip_ref.extractall('model_files')
    zip_ref.close()

    modulename = 'model_files.' + model_class_file.split('.')[0].replace(
        '-', '_')
    '''
    We required users to define where the model class is located or follow
    some naming convention we have provided.
    '''
    model_class = getattr(importlib.import_module(modulename),
                          model_class_name)

    # load & compile model
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model_class().to(device)
    model.load_state_dict(torch.load(weights_filename, map_location=device))

    # Define Loss and optimizer function for the PyTorch model
    if LossFn:
        loss_fn = eval(LossFn)
    else:
        loss_fn = torch.nn.CrossEntropyLoss()
    if Optimizer:
        optimizer = eval(Optimizer)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer,
                                   input_shape, nb_classes)

    # load test dataset
    x = np.load(dataset_filenamex)
    y = np.load(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)
    return metrics
Ejemplo n.º 24
0
           input_shape=x_train.shape[1:]))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

classifier = KerasClassifier((min_, max_), model=model)
classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples with FGSM
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier)
x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon)

# Evaluate the classifier on the adversarial examples
scores = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
Ejemplo n.º 25
0
def main(argv):
    if len(argv) < 2:
        sys.exit("Not enough arguments provided.")

    global network_definition_filename, weights_filename, dataset_filename

    i = 1
    while i <= 8:
        arg = str(argv[i])
        print(arg)
        if arg == "--data":
            dataset_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--networkdefinition":
            network_definition_filename = os.path.join(os.environ["DATA_DIR"],
                                                       str(argv[i + 1]))
        if arg == "--weights":
            weights_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--epsilon":
            epsilon = float(argv[i + 1])

        i += 2

    print("dataset : ", dataset_filename)
    print("network definition : ", network_definition_filename)
    print("weights : ", weights_filename)

    # load & compile model
    json_file = open(network_definition_filename, 'r')
    model_json = json_file.read()
    json_file.close()
    model = model_from_json(model_json)
    model.load_weights(weights_filename)
    comp_params = {
        'loss': 'categorical_crossentropy',
        'optimizer': 'adam',
        'metrics': ['accuracy']
    }
    model.compile(**comp_params)

    # create keras classifier
    classifier = KerasClassifier((0, 1), model)

    # load data set
    pf = np.load(dataset_filename)

    x = pf['x_test']
    y = pf['y_test']

    # pre-process numpy array

    x = np.expand_dims(x, axis=3)
    x = x.astype('float32') / 255

    y = np_utils.to_categorical(y, 10)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics = get_metrics(model, x, x_samples, y)

    print("metrics : ", metrics)

    report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt")

    with open(report_file, "w") as report:
        report.write(json.dumps(metrics))

    adv_samples_file = os.path.join(os.environ["RESULT_DIR"], 'adv_samples')
    print("adversarial samples saved to : ", adv_samples_file)
    np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
    # accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100))

    # PGD-100
    adv_crafter_pgd_100 = ProjectedGradientDescent(mnist_classifier, max_iter=100, batch_size=batch_size)

    x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100))

    # FGSM
    adv_crafter_fgsm = FastGradientMethod(mnist_classifier, eps=epsilon, batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after FGSM attack: {}%'.format(accuracy * 100))

    # DeepFool
    adv_crafter_deepfool = CarliniLInfMethod(mnist_classifier, batch_size=batch_size)
    x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array)

    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100))

    # C&W
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test FGSM with np.inf norm
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test, **{'batch_size': 2})
        x_train_adv = attack.generate(x_train, **{'batch_size': 4})

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))

        # Test minimal perturbations
        attack_params = {"minimal": True, "eps_step": .1, "eps_max": 1.}

        x_train_adv_min = attack.generate(x_train, **attack_params)
        x_test_adv_min = attack.generate(x_test, **attack_params)

        self.assertFalse((x_train_adv_min == x_train_adv).all())
        self.assertFalse((x_test_adv_min == x_test_adv).all())

        self.assertFalse((x_train == x_train_adv_min).all())
        self.assertFalse((x_test == x_test_adv_min).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv_min))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv_min))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on adversarial train examples with minimal perturbation: %.2f%%',
            (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with minimal perturbation: %.2f%%',
            (acc * 100))

        # L_1 norm
        attack = FastGradientMethod(classifier, eps=1, norm=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())
        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with L1 norm: %.2f%%',
            (acc * 100))

        # L_2 norm
        attack = FastGradientMethod(classifier, eps=1, norm=2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())
        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with L2 norm: %.2f%%',
            (acc * 100))
Ejemplo n.º 28
0
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws,
               current_line, attack_name, flag, column_i):

    classifier = KerasClassifier((0., 1.), model=model)

    if attack_name == "FGM":
        # ===========================参数设置========================= #
        # Maximum perturbation
        # Order of the norm
        parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1],
                         [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2],
                         [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2],
                         [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf],
                         [0.15, np.inf], [0.20, np.inf], [0.25, np.inf],
                         [0.30, np.inf], [0.35, np.inf], [0.40, np.inf],
                         [0.45, np.inf], [0.50, np.inf]]
        # ===========================进行攻击========================= #
        for [epsilon, norm_type] in parameter_lst:
            # print("current parameter: " + str(epsilon) + ", " + str(norm_type))
            adv_crafter = FastGradientMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              eps=epsilon,
                                              norm=norm_type)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "BIM":
        # ===========================参数设置========================= #
        # Order of the norm
        # Maximum perturbation that the attacker can introduce
        # Attack step size (input variation) at each iteration
        # The maximum number of iterations.
        parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10],
                         [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10],
                         [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50],
                         [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50],
                         [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50],
                         [2, 2.0, 0.2, 10], [2, 2.0, 0.4,
                                             10], [2, 2.0, 0.6, 10],
                         [2, 2.0, 0.8, 10], [2, 2.0, 1.0,
                                             10], [2, 2.0, 0.2, 50],
                         [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50],
                         [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50],
                         [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10],
                         [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10],
                         [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50],
                         [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50],
                         [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]]
        # ===========================进行攻击========================= #
        for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst:
            # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str(
            #     max_iteration))
            adv_crafter = BasicIterativeMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              norm=norm_type,
                                              eps=epsilon,
                                              eps_step=epsilon_step,
                                              max_iter=max_iteration)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " +
                str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "JSMA":
        # ===========================参数设置========================= #
        # Perturbation introduced to each modified feature per step (can be positive or negative).
        # Maximum percentage of perturbed features (between 0 and 1).
        parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5],
                         [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5],
                         [-0.4, 0.5], [-0.5, 0.5]]
        # ===========================进行攻击========================= #
        for [theta, gamma] in parameter_lst:
            # print("current parameter: " + str(theta) + ", " + str(gamma))
            adv_crafter = SaliencyMapMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              theta=theta,
                                              gamma=gamma)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")")

            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "DeepFool":
        # ===========================参数设置========================= #
        # The maximum number of iterations.
        # Overshoot parameter.
        parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10],
                         [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10],
                         [18, 0.10], [20, 0.10]]
        # ===========================进行攻击========================= #
        for [max_iteration, epsilon] in parameter_lst:
            # print("current parameter: " + str(max_iteration) + ", " + str(epsilon))
            adv_crafter = DeepFool(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              max_iter=max_iteration,
                                              epsilon=epsilon)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "CW-L2":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniL2Method(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    elif attack_name == "CW-Linf":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniLInfMethod(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    current_line += 1
    # print("\n------------------------------------------------")
    return ws, current_line
           input_shape=x_train.shape[1:]))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples with FGSM
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier, eps=epsilon)
x_test_adv = adv_crafter.generate(x=x_test)

# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
Ejemplo n.º 30
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images