def test_subsetscan_detector(self):
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier, _ = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.5)
        x_train_adv = attacker.generate(x_train)
        x_test_adv = attacker.generate(x_test)

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train, x_train_adv), axis=0)

        bgd = x_train
        clean = x_test
        anom = x_test_adv

        detector = SubsetScanningDetector(classifier, bgd, layer=1)

        _, _, dpwr = detector.scan(clean, clean)
        self.assertAlmostEqual(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, anom)
        self.assertGreater(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15)
        self.assertGreater(dpwr, 0.5)
def fgsm(clf, x_train, x_test, epsilon=0.1):
    from art.attacks.fast_gradient import FastGradientMethod
    epsilon = .1  # Maximum perturbation
    fgsm_adv_crafter = FastGradientMethod(clf, eps=epsilon)
    x_test_fgsm_adv = fgsm_adv_crafter.generate(x=x_test)
    x_train_fgsm_adv = fgsm_adv_crafter.generate(x=x_train)
    return x_train_fgsm_adv, x_test_fgsm_adv
    def test_with_defences(self):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1')

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        print(
            '\nAccuracy on adversarial train examples with feature squeezing: %.2f%%'
            % (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
Ejemplo n.º 4
0
    def test_with_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs)
        # Wrap the classifier
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%',
                    (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%',
                    (acc * 100))
def main(argv):
    if len(argv) < 2:
        sys.exit("Not enough arguments provided.")

    global network_definition_filename, weights_filename, dataset_filename

    i = 1
    while i <= 8:
        arg = str(argv[i])
        print(arg)
        if arg == "--datax":
            dataset_filenamex = os.path.join(os.environ["DATA_DIR"],
                                             str(argv[i + 1]))
        if arg == "--datay":
            dataset_filenamey = os.path.join(os.environ["DATA_DIR"],
                                             str(argv[i + 1]))
        if arg == "--weights":
            weights_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--epsilon":
            epsilon = float(argv[i + 1])

        i += 2

    print("dataset_x:", dataset_filenamex)
    print("dataset_y:", dataset_filenamey)
    print("weights:", weights_filename)

    # load & compile model
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = ThreeLayerCNN().to(device)
    model.load_state_dict(torch.load(weights_filename))
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier((0, 1), model, loss_fn, optimizer,
                                   (1, 3, 64, 64), 2)

    # load data set
    x = np.load(dataset_filenamex)
    y = np.loadtxt(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)

    report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt")

    with open(report_file, "w") as report:
        report.write(json.dumps(metrics))

    adv_samples_file = os.path.join(os.environ["RESULT_DIR"], "adv_samples")
    print("adversarial samples saved to: ", adv_samples_file)
    np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
Ejemplo n.º 6
0
    def test_without_defences(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper
        classifier = QueryEfficientBBGradientEstimation(self.classifier_k, 20, 1 / 64., round_samples=1 / 255.)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with limited query info: %.2f%%',
                    (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples with limited query info: %.2f%%', (acc * 100))
Ejemplo n.º 7
0
    def _test_with_defences(self, custom_activation=False):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1', custom_activation=custom_activation)

        attack = FastGradientMethod(classifier, eps=1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples with feature squeezing: %.2f%%', (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
    def test_with_preprocessing(self):

        session = tf.Session()
        k.set_session(session)

        comp_params = {
            "loss": 'categorical_crossentropy',
            "optimizer": 'adam',
            "metrics": ['accuracy']
        }

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 100
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"])
        classifier.compile(comp_params)
        classifier.fit(X_train,
                       Y_train,
                       epochs=1,
                       batch_size=batch_size,
                       verbose=0)
        scores = classifier.evaluate(X_train, Y_train)
        print("\naccuracy on training set: %.2f%%" % (scores[1] * 100))
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        attack_params = {
            "verbose": 0,
            "clip_min": 0.,
            "clip_max": 1.,
            "eps": 1.
        }

        attack = FastGradientMethod(classifier, session)
        X_train_adv = attack.generate(X_train, **attack_params)
        X_test_adv = attack.generate(X_test, **attack_params)

        self.assertFalse((X_train == X_train_adv).all())
        self.assertFalse((X_test == X_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(X_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(X_test_adv))

        self.assertFalse((Y_train == train_y_pred).all())
        self.assertFalse((Y_test == test_y_pred).all())

        scores = classifier.evaluate(X_train_adv, Y_train)
        print('\naccuracy on adversarial train examples: %.2f%%' %
              (scores[1] * 100))

        scores = classifier.evaluate(X_test_adv, Y_test)
        print('\naccuracy on adversarial test examples: %.2f%%' %
              (scores[1] * 100))
Ejemplo n.º 9
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit target classifier
        comp_params = {
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        }
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier_tgt = CNN(im_shape, dataset='mnist')
        classifier_tgt.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create source classifier
        classifier_src = CNN(im_shape, dataset='mnist')
        classifier_src.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src, session)
        adv2 = DeepFool(classifier_src, session)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        print(y_adv.shape)
        acc = classifier_tgt.evaluate(x_adv, y_adv)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2])
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv)
        self.assertTrue(acc_adv_trained >= acc)
    def test_binary_activation_detector(self):
        """
        Test the binary activation detector end-to-end.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier, _ = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:NB_TRAIN])
        x_test_adv = attacker.generate(x_test[:NB_TRAIN])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0)
        y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0)

        # Create a simple CNN for the detector
        activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:]
        number_outputs = 2
        model = Sequential()
        model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape))
        model.add(Flatten())
        model.add(Dense(number_outputs, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it.
        # Detector consider activations at layer=0:
        detector = BinaryActivationDetector(classifier=classifier,
                                            detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False),
                                            layer=0)
        detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug('Number of true positives detected: %i', nb_true_positives)
        logger.debug('Number of true negatives detected: %i', nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
Ejemplo n.º 11
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        classifier, _ = get_iris_classifier_kr()
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
Ejemplo n.º 12
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (acc * 100))
Ejemplo n.º 14
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        # Test FGSM with np.inf norm
        attack = FastGradientMethod(classifier, eps=1.0, targeted=True)
        
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0

        x_test_adv = attack.generate(x_test, minimal=True, eps_step=0.01, eps=1.0, y=y_test_adv)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        self.assertTrue((y_test_adv == test_y_pred).sum() >= x_test.shape[0] // 2)
Ejemplo n.º 15
0
    def test_shared_model_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same
        CNN on MNIST trained for 5 epochs. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit classifier
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier = CNN(im_shape, dataset='mnist')
        classifier.compile({
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        })
        classifier.fit(x_train, y_train, **params)

        # Create FGSM attacker
        adv = FastGradientMethod(classifier, session)
        x_adv = adv.generate(x_test)
        acc = classifier.evaluate(x_adv, y_test)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier, adv)
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_test)
        self.assertTrue(acc_adv_trained >= acc)
    def _test_with_defences(self, custom_activation=False):
        from art.defences import FeatureSqueezing

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get the ready-trained Keras model
        model = self.classifier_k._model
        fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1))
        classifier = KerasClassifier(model=model,
                                     clip_values=(0, 1),
                                     defences=fs,
                                     custom_activation=custom_activation)

        attack = FastGradientMethod(classifier, eps=1, batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        preds = classifier.predict(x_train_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on MNIST with FGM adversarial train examples with feature squeezing: %.2f%%',
            (acc * 100))

        preds = classifier.predict(x_test_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on MNIST with FGM adversarial test examples: %.2f%%',
            (acc * 100))
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = FastGradientMethod(classifier,
                                    targeted=True,
                                    eps=.1,
                                    batch_size=128)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted FGM on Iris: %.2f%%',
                    (acc * 100))
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test FGSM with np.inf norm
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test, **{'batch_size': 2})
        x_train_adv = attack.generate(x_train, **{'batch_size': 4})

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))

        # Test minimal perturbations
        attack_params = {"minimal": True, "eps_step": .1, "eps_max": 1.}

        x_train_adv_min = attack.generate(x_train, **attack_params)
        x_test_adv_min = attack.generate(x_test, **attack_params)

        self.assertFalse((x_train_adv_min == x_train_adv).all())
        self.assertFalse((x_test_adv_min == x_test_adv).all())

        self.assertFalse((x_train == x_train_adv_min).all())
        self.assertFalse((x_test == x_test_adv_min).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv_min))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv_min))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on adversarial train examples with minimal perturbation: %.2f%%',
            (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with minimal perturbation: %.2f%%',
            (acc * 100))

        # L_1 norm
        attack = FastGradientMethod(classifier, eps=1, norm=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())
        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with L1 norm: %.2f%%',
            (acc * 100))

        # L_2 norm
        attack = FastGradientMethod(classifier, eps=1, norm=2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())
        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with L2 norm: %.2f%%',
            (acc * 100))
Ejemplo n.º 19
0
    def test_binary_input_detector(self):
        """
        Test the binary input detector end-to-end.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        input_shape = x_train.shape[1:]
        nb_classes = 10

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(nb_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create classifier and train it:
        classifier = KerasClassifier((0, 1), model, use_logits=False)
        classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:nb_train])
        x_test_adv = attacker.generate(x_test[:nb_test])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv),
                                          axis=0)
        y_train_detector = np.concatenate(
            (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)),
            axis=0)

        # Create a simple CNN for the detector.
        # Note: we use the same architecture as for the classifier, except for the number of outputs (=2)
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(2, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it:
        detector = BinaryInputDetector(
            KerasClassifier((0, 1), model, use_logits=False))
        detector.fit(x_train_detector,
                     y_train_detector,
                     nb_epochs=2,
                     batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector(x_test), axis=1)
        test_adv_detection = np.argmax(detector(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative:
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        self.assertTrue(nb_true_positives > 0)
        self.assertTrue(nb_true_negatives > 0)
                                                   eps_step=0.01,
                                                   max_iter=100,
                                                   batch_size=batch_size)

    x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100))

    # FGSM
    adv_crafter_fgsm = FastGradientMethod(cifar_classifier,
                                          eps=epsilon,
                                          eps_step=0.01,
                                          batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after FGSM attack: {}%'.format(accuracy * 100))

    # Deepfool
    adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size)
    x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array)

    predictions = cifar_classifier.predict(x_test_adv)
Ejemplo n.º 21
0
def main(argv):
    if len(argv) < 2:
        sys.exit("Not enough arguments provided.")

    global network_definition_filename, weights_filename, dataset_filename

    i = 1
    while i <= 8:
        arg = str(argv[i])
        print(arg)
        if arg == "--data":
            dataset_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--networkdefinition":
            network_definition_filename = os.path.join(os.environ["DATA_DIR"],
                                                       str(argv[i + 1]))
        if arg == "--weights":
            weights_filename = os.path.join(os.environ["DATA_DIR"],
                                            str(argv[i + 1]))
        if arg == "--epsilon":
            epsilon = float(argv[i + 1])

        i += 2

    print("dataset : ", dataset_filename)
    print("network definition : ", network_definition_filename)
    print("weights : ", weights_filename)

    # load & compile model
    json_file = open(network_definition_filename, 'r')
    model_json = json_file.read()
    json_file.close()
    model = model_from_json(model_json)
    model.load_weights(weights_filename)
    comp_params = {
        'loss': 'categorical_crossentropy',
        'optimizer': 'adam',
        'metrics': ['accuracy']
    }
    model.compile(**comp_params)

    # create keras classifier
    classifier = KerasClassifier((0, 1), model)

    # load data set
    pf = np.load(dataset_filename)

    x = pf['x_test']
    y = pf['y_test']

    # pre-process numpy array

    x = np.expand_dims(x, axis=3)
    x = x.astype('float32') / 255

    y = np_utils.to_categorical(y, 10)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics = get_metrics(model, x, x_samples, y)

    print("metrics : ", metrics)

    report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt")

    with open(report_file, "w") as report:
        report.write(json.dumps(metrics))

    adv_samples_file = os.path.join(os.environ["RESULT_DIR"], 'adv_samples')
    print("adversarial samples saved to : ", adv_samples_file)
    np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
Ejemplo n.º 22
0
              optimizer='adam',
              metrics=['accuracy'])

classifier = KerasClassifier((min_, max_), model=model)
classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples

################ FGSM;
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier)
x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon)
# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
# grab a particular example to play wit
a = (preds != np.argmax(y_test, axis=1))
nat_img = x_test[a]
adv_img = x_test_adv[a]
adv_nse = adv_img - nat_img
adv_prd = preds[a]
# compute variance and plot (some) example(s)
adv_var = np.sqrt(np.var(adv_nse) / np.var(nat_img))
adv_plt = np.column_stack([
    nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28),
    # predictions = mnist_classifier.predict(x_test_adv)
    # accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100))

    # PGD-100
    adv_crafter_pgd_100 = ProjectedGradientDescent(mnist_classifier, max_iter=100, batch_size=batch_size)

    x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100))

    # FGSM
    adv_crafter_fgsm = FastGradientMethod(mnist_classifier, eps=epsilon, batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after FGSM attack: {}%'.format(accuracy * 100))

    # DeepFool
    adv_crafter_deepfool = CarliniLInfMethod(mnist_classifier, batch_size=batch_size)
    x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array)

    predictions = mnist_classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array)
    print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100))
Ejemplo n.º 24
0
def run_ssc(test_object, outs):
    print('To run ssc\n')

    f_results, layer_functions, cover_layers, _ = ssc_setup(test_object, outs)
    d_advs = []
    f = open(f_results, "a")
    f.write(
        '#ssc runs;  #test cases;  #adversarial examples;  is feasible; is top-1 adversarial example; is top-x adversarial example; condition feature size; L infinity distance; L0 distance; decision layer index; dec feature; #condition layer neurons; new labels; original labels; coverage; local coverage\n'
    )
    f.close()

    tot_decs = 0
    if test_object.layer_indices == None:
        for i in range(1, len(cover_layers)):
            if i == 1 and is_input_layer(test_object.dnn.layers[0]): continue
            else:
                csp = cover_layers[i].layer.input.shape
                dsp = cover_layers[i].ssc_map.shape
                if is_dense_layer(
                        cover_layers[i].layer) or not (csp[1] == dsp[1]
                                                       and csp[2] == dsp[2]):
                    tot_decs += cover_layers[i].ssc_map.size
                else:
                    ks = cover_layers[i].layer.kernel_size
                    sp = cover_layers[i].ssc_map.shape
                    tot_decs += ((sp[1] - ks[0] + 1) * (sp[2] - ks[1] + 1) *
                                 sp[3])
    else:
        print(test_object.layer_indices, test_object.feature_indices)
        for i in range(1, len(cover_layers)):
            if cover_layers[i].layer_index in test_object.layer_indices:
                print('****', i)
                csp = cover_layers[i].layer.input.shape
                dsp = cover_layers[i].ssc_map.shape
                if is_dense_layer(
                        cover_layers[i].layer) or not (csp[1] == dsp[1]
                                                       and csp[2] == dsp[2]):
                    tmp_decs = cover_layers[i].ssc_map.size
                else:
                    ks = cover_layers[i].layer.kernel_size
                    dsp = cover_layers[i].ssc_map.shape
                    tmp_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) *
                                dsp[3])
                if is_conv_layer(cover_layers[i].layer):
                    if not test_object.feature_indices == None:
                        print('**', tmp_decs)
                        tmp_decs = tmp_decs * (
                            len(test_object.feature_indices) * 1.0 / dsp[3])
                        print('**', tmp_decs)
                tot_decs += tmp_decs
    print('tot_decs', tot_decs)
    tot_coverage = 0.0

    ## define a global attacker
    classifier = KerasClassifier((MIN, -MIN), model=test_object.dnn)
    adv_crafter = FastGradientMethod(classifier)

    test_cases = []
    adversarials = []
    count = 0

    while True:
        dec_layer_index, dec_pos = get_ssc_next(cover_layers,
                                                test_object.layer_indices,
                                                test_object.feature_indices)
        cover_layers[dec_layer_index].ssc_map.itemset(dec_pos, False)

        if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]):
            continue

        ###
        cond_layer = cover_layers[dec_layer_index - 1]
        dec_layer = cover_layers[dec_layer_index]
        cond_cover = np.zeros(cond_layer.ssc_map.shape, dtype=bool)
        ###

        if is_padding(dec_pos, dec_layer, cond_layer):
            continue
        print('dec_layer_index', cover_layers[dec_layer_index].layer_index)

        tot_conds = cond_cover.size
        if is_conv_layer(cond_layer.layer):
            csp = cond_layer.layer.input.shape
            dsp = cond_layer.ssc_map.shape
            if (csp[1] == dsp[1] and csp[2] == dsp[2]):
                ks = cond_layer.layer.kernel_size
                dsp = cond_layer.ssc_map.shape
                tot_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) *
                            dsp[3])

        non_increasing = 0
        step_coverage = 0
        while not (step_coverage >= 1.0 or non_increasing >= 10):
            count += 1

            d_min, d_norm, new_image, old_image, old_labels, cond_diff_map = ssc_search(
                test_object, layer_functions, cond_layer, None, dec_layer,
                dec_pos, adv_crafter)

            print('d_min is', d_min, 'd_norm is', d_norm)

            feasible = (
                d_min <= test_object.cond_ratio * cond_layer.ssc_map.size
                or d_min == 1)

            top1_adv_flag = False
            top5_adv_flag = False
            y1s = []
            y2s = []
            y1_flag = False
            y2_flag = False
            labels = test_object.labels

            l0_d = None
            top_classes = test_object.top_classes
            inp_ub = test_object.inp_ub

            found_new = True
            if feasible:
                cond_cover = np.logical_or(cond_cover, cond_diff_map)
                covered = np.count_nonzero(cond_cover)
                new_step_coverage = covered * 1.0 / tot_conds
                if new_step_coverage == step_coverage:
                    non_increasing += 1
                    found_new = False
                else:
                    non_increasing = 0
                step_coverage = new_step_coverage

            if feasible and found_new:

                test_cases.append((new_image, old_image))
                if inp_ub == 255:
                    new_image = new_image.astype('uint8')
                    old_image = old_image.astype('uint8')
                    diff_image = np.abs(new_image - old_image)
                else:
                    new_image_ = new_image * 255.0 / inp_ub
                    old_image_ = old_image * 255.0 / inp_ub
                    new_image_ = new_image_.astype('uint8')
                    old_image_ = old_image_.astype('uint8')
                    diff_image = np.abs(new_image_ - old_image_)
                l0_d = np.count_nonzero(diff_image) / (new_image.size * 1.0)
                y1s = (np.argsort(
                    test_object.dnn.predict(np.array([new_image
                                                      ]))))[0][-top_classes:]
                y2s = (np.argsort(
                    test_object.dnn.predict(np.array([old_image
                                                      ]))))[0][-top_classes:]

                if y1s[top_classes - 1] != y2s[top_classes - 1]:
                    top1_adv_flag = True

                if labels == None: labels = old_labels
                for label in labels:
                    if label in y1s: y1_flag = True
                    if label in y2s: y2_flag = True

                if y1_flag != y2_flag: top5_adv_flag = True

                if top5_adv_flag:
                    print('found an adversarial example')
                    adversarials.append((new_image, old_image))
                    save_adversarial_examples([
                        new_image / (inp_ub * 1.0), '{0}-adv-{1}'.format(
                            len(adversarials), y1s[top_classes - 1])
                    ], [
                        old_image / (inp_ub * 1.0), '{0}-original-{1}'.format(
                            len(adversarials), y2s[top_classes - 1])
                    ], [
                        diff_image /
                        (255 * 1.0), '{0}-diff'.format(len(adversarials))
                    ],
                                              f_results.split('/')[0])
                    adv_flag = True
                    d_advs.append(d_norm)
                    if len(d_advs) % 100 == 0:
                        print_adversarial_distribution(
                            d_advs,
                            f_results.replace('.txt', '') +
                            '-adversarial-distribution.txt')
            else:
                print("not feasible")

            print('f_results: ', f_results)
            f = open(f_results, "a")
            f.write(
                '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11} {12} {13} {14} {15}\n'
                .format(count, len(test_cases), len(adversarials), feasible,
                        top1_adv_flag, top5_adv_flag, d_min, d_norm, l0_d,
                        dec_layer.layer_index, dec_pos,
                        cond_layer.ssc_map.size, y1s, y2s,
                        tot_coverage + step_coverage / tot_decs,
                        step_coverage))
            f.close()
            #######
            if not feasible: break
            #######
        tot_coverage += step_coverage / tot_decs
        ## todo: this is a shortcut
        if not np.any(cover_layers[dec_layer_index].ssc_map):
            print(
                'all decision features at layer {0} have been covered'.format(
                    dec_layer.layer_index))
            sys.exit(0)
Ejemplo n.º 25
0
def mcdc(x,
         dnn,
         aveImg_binary,
         mcdc_cond_ratio=0.2,
         max_v=255,
         lb_v=-125.5,
         ub_v=125.5,
         opt=True,
         num=None,
         tot_iters=1000):
    x_test = np.array([x])
    raw_data = raw_datat(x_test, None)
    test_object = test_objectt(dnn, raw_data, 'ssc', 'linf')
    test_object.cond_ratio = mcdc_cond_ratio
    adv_object = adv_objectt(max_v, lb_v, ub_v)
    predictResults = dnn.predict(np.array([x]), verbose=1)
    res = np.argmax(predictResults)
    f_results, layer_functions, cover_layers, _ = ssc_setup(
        test_object, '/tmp/')

    d_advs = []
    f = open(f_results, "a")
    f.write(
        '#ssc runs;  #test cases;  #adversarial examples;  is feasible; is top-1 adversarial example; is top-x adversarial example; condition feature size; L infinity distance; L0 distance; decision layer index; dec feature; #condition layer neurons; new labels; original labels; coverage; local coverage\n'
    )
    f.close()

    if not (num is None):
        new_images = []

    tot_decs = 0
    if test_object.layer_indices == None:
        for i in range(1, len(cover_layers)):
            if i == 1 and is_input_layer(test_object.dnn.layers[0]): continue
            else:
                csp = cover_layers[i].layer.input.shape
                dsp = cover_layers[i].ssc_map.shape
                if is_dense_layer(
                        cover_layers[i].layer) or not (csp[1] == dsp[1]
                                                       and csp[2] == dsp[2]):
                    tot_decs += cover_layers[i].ssc_map.size
                else:
                    ks = cover_layers[i].layer.kernel_size
                    sp = cover_layers[i].ssc_map.shape
                    tot_decs += ((sp[1] - ks[0] + 1) * (sp[2] - ks[1] + 1) *
                                 sp[3])
    else:
        print(test_object.layer_indices, test_object.feature_indices)
        for i in range(1, len(cover_layers)):
            if cover_layers[i].layer_index in test_object.layer_indices:
                print('****', i)
                csp = cover_layers[i].layer.input.shape
                dsp = cover_layers[i].ssc_map.shape
                if is_dense_layer(
                        cover_layers[i].layer) or not (csp[1] == dsp[1]
                                                       and csp[2] == dsp[2]):
                    tmp_decs = cover_layers[i].ssc_map.size
                else:
                    ks = cover_layers[i].layer.kernel_size
                    dsp = cover_layers[i].ssc_map.shape
                    tmp_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) *
                                dsp[3])
                if is_conv_layer(cover_layers[i].layer):
                    if not test_object.feature_indices == None:
                        tmp_decs = tmp_decs * (
                            len(test_object.feature_indices) * 1.0 / dsp[3])
                tot_decs += tmp_decs
    tot_coverage = 0.0

    ## define a global attacker
    #classifier=KerasClassifier((MIN, -MIN), model=test_object.dnn)
    classifier = KerasClassifier(test_object.dnn)
    adv_crafter = FastGradientMethod(classifier)

    test_cases = []
    adversarials = []
    count = 0

    while count < tot_iters:
        dec_layer_index, dec_pos = get_ssc_next(cover_layers)
        cover_layers[dec_layer_index].ssc_map.itemset(dec_pos, False)
        if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]):
            continue
        #print (dec_layer_index, dec_pos)
        ###
        cond_layer = cover_layers[dec_layer_index - 1]
        dec_layer = cover_layers[dec_layer_index]
        cond_cover = np.zeros(cond_layer.ssc_map.shape, dtype=bool)
        ###

        if is_padding(dec_pos, dec_layer, cond_layer, False):
            continue

        tot_conds = cond_cover.size
        if is_conv_layer(cond_layer.layer):
            csp = cond_layer.layer.input.shape
            dsp = cond_layer.ssc_map.shape
            if (csp[1] == dsp[1] and csp[2] == dsp[2]):
                ks = cond_layer.layer.kernel_size
                dsp = cond_layer.ssc_map.shape
                tot_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) *
                            dsp[3])

        non_increasing = 0
        step_coverage = 0
        count += 1
        d_min, d_norm, new_image, old_image, old_labels, cond_diff_map = ssc_search(
            test_object, layer_functions, cond_layer, None, dec_layer, dec_pos,
            adv_crafter, adv_object)
        #print ('d_min is', d_min, 'd_norm is', d_norm)
        cond_ratio = test_object.cond_ratio
        feasible = (d_min <= cond_ratio * cond_layer.ssc_map.size
                    or d_min == 1)
        if feasible:
            new_predictResults = dnn.predict(np.array([new_image]), verbose=1)
            new_res = np.argmax(new_predictResults)
            #print ('####', res, new_res, x.shape)
            if res == new_res: continue

            ## to optimise the adversarial example
            if opt:
                for i in range(0, len(x)):
                    simple_x = x.copy()
                    for ii in range(0, i + 1):
                        simple_x[ii] = new_image[ii]
                    simple_predictResults = dnn.predict(np.array([simple_x]),
                                                        batch_size=5000,
                                                        verbose=1)
                    simple_res = np.argmax(simple_predictResults)
                    if simple_res == res: continue
                    #for ii in range(0, i+1):
                    #  plt.imshow(simple_x[ii],cmap='gray')
                    #  #plt.imsave('new_{0}.png'.format(ii),simple_x[ii],cmap='gray')
                    #  plt.show()
                    #  plt.imshow(x[ii],cmap='gray')
                    #  plt.show()
                    ##  #plt.imsave('origin_{0}.png'.format(ii),x[ii],cmap='gray')
                    if num is None: return True, simple_x
                    else: new_images.append(simple_x)
            else:
                if num is None: return True, new_image
                else: new_images.append(new_image)
        if not (num is None):
            if len(new_images) >= num: return True, np.array(new_images)

    if (num is None): return False, None
    else: return False, np.array(new_images)
Ejemplo n.º 26
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images
Ejemplo n.º 27
0
        wfile.write("Model used for crafting the adversarial examples is in " +
                    MODEL_PATH)

    v_print("Adversarials crafted with", args.adv_method, "on", MODEL_PATH,
            "will be saved in", SAVE_ADV)

if args.adv_method in ['fgsm', "vat", "rnd_fgsm"]:

    eps_ranges = {
        'fgsm': [e / 10 for e in range(1, 11)],
        'rnd_fgsm': [e / 10 for e in range(1, 11)],
        'vat': [1.5, 2.1, 5, 7, 10]
    }

    if args.adv_method in ["fgsm", "rnd_fgsm"]:
        adv_crafter = FastGradientMethod(classifier, sess=session)
    else:
        adv_crafter = VirtualAdversarialMethod(classifier, sess=session)

    for eps in eps_ranges[args.adv_method]:

        if args.adv_method == "rnd_fgsm":
            x_train = np.clip(
                X_train + alpha * np.sign(np.random.randn(*X_train.shape)),
                min_, max_)
            x_test = np.clip(
                X_test + alpha * np.sign(np.random.randn(*X_test.shape)), min_,
                max_)
            e = eps - alpha
        else:
            x_train = X_train
Ejemplo n.º 28
0
def run_svc(test_object, outs):
    print('To run svc\n')

    f_results, layer_functions, cover_layers, activations = ssc_setup(
        test_object, outs)

    ## define a global attacker
    classifier = KerasClassifier((MIN, -MIN), model=test_object.dnn)
    adv_crafter = FastGradientMethod(classifier)

    test_cases = []
    adversarials = []

    count = 0

    while True:
        dec_layer_index, dec_pos = get_ssc_next(cover_layers)

        if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]):
            continue
        print('dec_layer_index', clayers[dec_layer_index].layer_index)

        ###
        cond_layer = cover_layers[dec_layer_index - 1]
        dec_layer = cover_layers[dec_layer_index]
        cond_cover = np.ones(cond_layer.ssc_map.shape, dtype=bool)
        ###

        ## to check if dec_pos is a padding
        dec_pos_unravel = None
        osp = dec_layer.ssc_map.shape
        dec_pos_unravel = np.unravel_index(dec_pos, osp)
        if is_conv_layer(dec_layer.layer):
            Weights = dec_layer.layer.get_weights()
            weights = Weights[0]
            biases = Weights[1]
            I = 0
            J = dec_pos_unravel[1]
            K = dec_pos_unravel[2]
            L = dec_pos_unravel[3]
            kernel_size = dec_layer.layer.kernel_size
            try:
                for II in range(0, kernel_size[0]):
                    for JJ in range(0, kernel_size[1]):
                        for KK in range(0, weights.shape[2]):
                            try_tmp = cond_layer.ssc_map[0][J + II][K + JJ][KK]
            except:
                #print ('dec neuron is a padding')
                continue

        cond_pos = np.random.randint(0, cond_cover.size)

        print('cond, dec layer index: ', cond_layer.layer_index,
              dec_layer.layer_index)
        print('dec_layer_index: ', clayers[dec_layer_index].layer_index)

        count += 1

        dec_ub = dec_layer.ubs.item(dec_pos) + 0.001
        #for act in activations[dec_layer.layer_index]:
        #  v=act.item(dec_pos)
        #  if v>dec_ub: dec_ub=v

        print('dec_ub: ', dec_ub)

        d_min, d_norm, new_image, old_image = svc_search(
            test_object, layer_functions, cond_layer, cond_pos, dec_layer,
            dec_pos, adv_crafter, dec_ub)

        print('d_min is', d_min, 'd_norm is', d_norm)

        feasible = (d_min <= test_object.cond_ratio * cond_layer.ssc_map.size
                    or d_min == 1)

        top1_adv_flag = False
        top5_adv_flag = False
        top5b_adv_flag = False
        y1s = []
        y2s = []
        y1_flag = False
        y2_flag = False
        labels = test_object.labels  #[555, 920]

        l0_d = None
        top_classes = test_object.top_classes
        inp_ub = test_object.inp_ub

        if feasible:
            test_cases.append((new_image, old_image))
            if inp_ub == 255:
                new_image = new_image.astype('uint8')
                old_image = old_image.astype('uint8')
            diff_image = np.abs(new_image - old_image)
            l0_d = np.count_nonzero(diff_image) / (new_image.size * 1.0)
            y1s = (np.argsort(test_object.dnn.predict(np.array(
                [new_image]))))[0][-top_classes:]
            y2s = (np.argsort(test_object.dnn.predict(np.array(
                [old_image]))))[0][-top_classes:]

            if y1s[top_classes - 1] != y2s[top_classes - 1]:
                top1_adv_flag = True

            if not y1s[top_classes - 1] in y2s: top5b_adv_flag = True

            for label in labels:
                if label in y1s: y1_flag = True
                if label in y2s: y2_flag = True

            if y1_flag != y2_flag: top5_adv_flag = True

            if top5_adv_flag:
                print('found an adversarial example')
                adversarials.append((new_image, old_image))
                save_an_image(
                    new_image / (inp_ub * 1.0),
                    '{0}-adv-{1}.png'.format(len(adversarials),
                                             y1s[top_classes - 1]),
                    f_results.split('/')[0])
                save_an_image(
                    old_image / (inp_ub * 1.0),
                    '{0}-original-{1}.png'.format(len(adversarials),
                                                  y2s[top_classes - 1]),
                    f_results.split('/')[0])
                save_an_image(diff_image / (inp_ub * 1.0),
                              '{0}-diff.png'.format(len(adversarials)),
                              f_results.split('/')[0])
                adv_flag = True
        else:
            print("not feasible")

        print('f_results: ', f_results)
        f = open(f_results, "a")
        f.write(
            '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11} {12} {13}\n'.
            format(count, len(test_cases), len(adversarials), feasible,
                   top1_adv_flag, top5_adv_flag, top5b_adv_flag, d_min, d_norm,
                   l0_d, dec_layer.layer_index, cond_layer.ssc_map.size, y1s,
                   y2s))
        f.close()
           input_shape=x_train.shape[1:]))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples with FGSM
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier, eps=epsilon)
x_test_adv = adv_crafter.generate(x=x_test)

# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
Ejemplo n.º 30
0
def robustness_check(object_storage_url,
                     object_storage_username,
                     object_storage_password,
                     data_bucket_name,
                     result_bucket_name,
                     model_id,
                     feature_testset_path='processed_data/X_test.npy',
                     label_testset_path='processed_data/y_test.npy',
                     clip_values=(0, 1),
                     nb_classes=2,
                     input_shape=(1, 3, 64, 64),
                     model_class_file='model.py',
                     model_class_name='model',
                     LossFn='',
                     Optimizer='',
                     epsilon=0.2):

    url = re.compile(r"https?://")
    cos = Minio(url.sub('', object_storage_url),
                access_key=object_storage_username,
                secret_key=object_storage_password)

    dataset_filenamex = "X_test.npy"
    dataset_filenamey = "y_test.npy"
    weights_filename = "model.pt"
    model_files = model_id + '/_submitted_code/model.zip'

    cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex)
    cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey)
    cos.fget_object(result_bucket_name, model_id + '/' + weights_filename,
                    weights_filename)
    cos.fget_object(result_bucket_name, model_files, 'model.zip')

    # Load PyTorch model definition from the source code.
    zip_ref = zipfile.ZipFile('model.zip', 'r')
    zip_ref.extractall('model_files')
    zip_ref.close()

    modulename = 'model_files.' + model_class_file.split('.')[0].replace(
        '-', '_')
    '''
    We required users to define where the model class is located or follow
    some naming convention we have provided.
    '''
    model_class = getattr(importlib.import_module(modulename),
                          model_class_name)

    # load & compile model
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model_class().to(device)
    model.load_state_dict(torch.load(weights_filename, map_location=device))

    # Define Loss and optimizer function for the PyTorch model
    if LossFn:
        loss_fn = eval(LossFn)
    else:
        loss_fn = torch.nn.CrossEntropyLoss()
    if Optimizer:
        optimizer = eval(Optimizer)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer,
                                   input_shape, nb_classes)

    # load test dataset
    x = np.load(dataset_filenamex)
    y = np.load(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)
    return metrics