def test_without_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper classifier = QueryEfficientBBGradientEstimation(self.classifier_k, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with limited query info: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples with limited query info: %.2f%%', (acc * 100))
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs) # Wrap the classifier classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%', (acc * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = FastGradientMethod(classifier, targeted=True, eps=.1, batch_size=128) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted FGM on Iris: %.2f%%', (acc * 100))
def fgsm(clf, x_train, x_test, epsilon=0.1): from art.attacks.fast_gradient import FastGradientMethod epsilon = .1 # Maximum perturbation fgsm_adv_crafter = FastGradientMethod(clf, eps=epsilon) x_test_fgsm_adv = fgsm_adv_crafter.generate(x=x_test) x_train_fgsm_adv = fgsm_adv_crafter.generate(x=x_train) return x_train_fgsm_adv, x_test_fgsm_adv
def _test_with_defences(self, custom_activation=False): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model classifier = KerasClassifier((0, 1), model, defences='featsqueeze1', custom_activation=custom_activation) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with feature squeezing: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_subsetscan_detector(self): (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier, _ = get_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.5) x_train_adv = attacker.generate(x_train) x_test_adv = attacker.generate(x_test) # Compile training data for detector: x_train_detector = np.concatenate((x_train, x_train_adv), axis=0) bgd = x_train clean = x_test anom = x_test_adv detector = SubsetScanningDetector(classifier, bgd, layer=1) _, _, dpwr = detector.scan(clean, clean) self.assertAlmostEqual(dpwr, 0.5) _, _, dpwr = detector.scan(clean, anom) self.assertGreater(dpwr, 0.5) _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15) self.assertGreater(dpwr, 0.5)
def test_with_defences(self): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Get the ready-trained Keras model model = self.classifier_k._model classifier = KerasClassifier((0, 1), model, defences='featsqueeze1') attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] print( '\nAccuracy on adversarial train examples with feature squeezing: %.2f%%' % (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
def test_with_preprocessing(self): session = tf.Session() k.set_session(session) comp_params = { "loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy'] } # get MNIST batch_size, nb_train, nb_test = 100, 1000, 100 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"]) classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_train, Y_train) print("\naccuracy on training set: %.2f%%" % (scores[1] * 100)) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) attack_params = { "verbose": 0, "clip_min": 0., "clip_max": 1., "eps": 1. } attack = FastGradientMethod(classifier, session) X_train_adv = attack.generate(X_train, **attack_params) X_test_adv = attack.generate(X_test, **attack_params) self.assertFalse((X_train == X_train_adv).all()) self.assertFalse((X_test == X_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(X_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(X_test_adv)) self.assertFalse((Y_train == train_y_pred).all()) self.assertFalse((Y_test == test_y_pred).all()) scores = classifier.evaluate(X_train_adv, Y_train) print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100)) scores = classifier.evaluate(X_test_adv, Y_test) print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
def test_binary_activation_detector(self): """ Test the binary activation detector end-to-end. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier, _ = get_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:NB_TRAIN]) x_test_adv = attacker.generate(x_test[:NB_TRAIN]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0) y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0) # Create a simple CNN for the detector activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:] number_outputs = 2 model = Sequential() model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape)) model.add(Flatten()) model.add(Dense(number_outputs, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create detector and train it. # Detector consider activations at layer=0: detector = BinaryActivationDetector(classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug('Number of true positives detected: %i', nb_true_positives) logger.debug('Number of true negatives detected: %i', nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def main(argv): if len(argv) < 2: sys.exit("Not enough arguments provided.") global network_definition_filename, weights_filename, dataset_filename i = 1 while i <= 8: arg = str(argv[i]) print(arg) if arg == "--datax": dataset_filenamex = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--datay": dataset_filenamey = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--weights": weights_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--epsilon": epsilon = float(argv[i + 1]) i += 2 print("dataset_x:", dataset_filenamex) print("dataset_y:", dataset_filenamey) print("weights:", weights_filename) # load & compile model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = ThreeLayerCNN().to(device) model.load_state_dict(torch.load(weights_filename)) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 3, 64, 64), 2) # load data set x = np.load(dataset_filenamex) y = np.loadtxt(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt") with open(report_file, "w") as report: report.write(json.dumps(metrics)) adv_samples_file = os.path.join(os.environ["RESULT_DIR"], "adv_samples") print("adversarial samples saved to: ", adv_samples_file) np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
def _test_with_defences(self, custom_activation=False): from art.defences import FeatureSqueezing # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs, custom_activation=custom_activation) attack = FastGradientMethod(classifier, eps=1, batch_size=128) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on MNIST with FGM adversarial train examples with feature squeezing: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on MNIST with FGM adversarial test examples: %.2f%%', (acc * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit target classifier comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier_tgt = CNN(im_shape, dataset='mnist') classifier_tgt.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create source classifier classifier_src = CNN(im_shape, dataset='mnist') classifier_src.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src, session) adv2 = DeepFool(classifier_src, session) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) print(y_adv.shape) acc = classifier_tgt.evaluate(x_adv, y_adv) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2]) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv) self.assertTrue(acc_adv_trained >= acc)
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist # Test FGSM with np.inf norm attack = FastGradientMethod(classifier, eps=1.0, targeted=True) pred_sort = classifier.predict(x_test).argsort(axis=1) y_test_adv = np.zeros((x_test.shape[0], 10)) for i in range(x_test.shape[0]): y_test_adv[i, pred_sort[i, -2]] = 1.0 x_test_adv = attack.generate(x_test, minimal=True, eps_step=0.01, eps=1.0, y=y_test_adv) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertEqual(y_test_adv.shape, test_y_pred.shape) self.assertTrue((y_test_adv == test_y_pred).sum() >= x_test.shape[0] // 2)
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (acc * 100))
def test_shared_model_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same CNN on MNIST trained for 5 epochs. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit classifier params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier = CNN(im_shape, dataset='mnist') classifier.compile({ 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] }) classifier.fit(x_train, y_train, **params) # Create FGSM attacker adv = FastGradientMethod(classifier, session) x_adv = adv.generate(x_test) acc = classifier.evaluate(x_adv, y_test) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier, adv) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_test) self.assertTrue(acc_adv_trained >= acc)
if args.adv_method == "rnd_fgsm": x_train = np.clip( X_train + alpha * np.sign(np.random.randn(*X_train.shape)), min_, max_) x_test = np.clip( X_test + alpha * np.sign(np.random.randn(*X_test.shape)), min_, max_) e = eps - alpha else: x_train = X_train x_test = X_test e = eps X_train_adv = adv_crafter.generate(x_val=x_train, eps=e, clip_min=min_, clip_max=max_) X_test_adv = adv_crafter.generate(x_val=x_test, eps=e, clip_min=min_, clip_max=max_) if args.save: np.save(os.path.join(SAVE_ADV, "eps%.2f_train.npy" % eps), X_train_adv) np.save(os.path.join(SAVE_ADV, "eps%.2f_test.npy" % eps), X_test_adv) else: if args.adv_method == 'deepfool': adv_crafter = DeepFool(classifier,
mnist_classifier = PyTorchClassifier(clip_values=(0, 1), model=model, loss=criterion, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10) # Train the classifier mnist_classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10) # Test the classifier predictions = mnist_classifier.predict(x_test) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print('Accuracy before attack: {}%'.format(accuracy * 100)) start = time.time() # Craft the adversarial examples epsilon = 0.2 # Maximum perturbation adv_crafter = FastGradientMethod(mnist_classifier, eps=epsilon) x_test_adv = adv_crafter.generate(x=x_test) x_train_adv = adv_crafter.generate(x=x_train) torch.save(x_test_adv, 'tensors/x_test_adv.pt') torch.save(x_train_adv, 'tensors/x_train_adv.pt') end = time.time() # Test the classifier on adversarial exmaples predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print('Accuracy after attack: {}%'.format(accuracy * 100)) mnist_classifier.save('mnist_fgsm_state_dict', 'models') # print((mnist_classifier)) # torch.save(model.state_dict(), 'models/mnist_fgsm_state_dict') # torch.save(x_test, 'tensors/test_imgs_mnist.pt')
def test_binary_input_detector(self): """ Test the binary input detector end-to-end. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] input_shape = x_train.shape[1:] nb_classes = 10 # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(nb_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create classifier and train it: classifier = KerasClassifier((0, 1), model, use_logits=False) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:nb_train]) x_test_adv = attacker.generate(x_test[:nb_test]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv), axis=0) y_train_detector = np.concatenate( (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)), axis=0) # Create a simple CNN for the detector. # Note: we use the same architecture as for the classifier, except for the number of outputs (=2) model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(2, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create detector and train it: detector = BinaryInputDetector( KerasClassifier((0, 1), model, use_logits=False)) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector(x_test), axis=1) test_adv_detection = np.argmax(detector(x_test_adv), axis=1) # Assert there is at least one true positive and negative: nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) self.assertTrue(nb_true_positives > 0) self.assertTrue(nb_true_negatives > 0)
from art.utils import load_dataset # Get session session = tf.Session() k.set_session(session) # Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist') im_shape = x_train[0].shape # Construct a convolutional neural network comp_params = {'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy']} classifier = CNN(im_shape, act='relu', dataset='mnist') classifier.compile(comp_params) classifier.fit(x_train, y_train, validation_split=.1, epochs=5, batch_size=128) # Evaluate the classifier on the test set scores = classifier.evaluate(x_test, y_test) print("\nTest loss: %.2f%%\nTest accuracy: %.2f%%" % (scores[0], scores[1] * 100)) # Craft adversarial samples with FGSM epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier, sess=session) x_test_adv = adv_crafter.generate(x_val=x_test, eps=epsilon, clip_min=min_, clip_max=max_) # Evaluate the classifier on the adversarial examples scores = classifier.evaluate(x_test_adv, y_test) print("\nTest loss: %.2f%%\nTest accuracy: %.2f%%" % (scores[0], scores[1] * 100))
metrics=['accuracy']) classifier = KerasClassifier((min_, max_), model=model) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples ################ FGSM; epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100)) # grab a particular example to play wit a = (preds != np.argmax(y_test, axis=1)) nat_img = x_test[a] adv_img = x_test_adv[a] adv_nse = adv_img - nat_img adv_prd = preds[a] # compute variance and plot (some) example(s) adv_var = np.sqrt(np.var(adv_nse) / np.var(nat_img)) adv_plt = np.column_stack([ nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28), adv_img[0].reshape(28, 28)
def robustness_check(object_storage_url, object_storage_username, object_storage_password, data_bucket_name, result_bucket_name, model_id, feature_testset_path='processed_data/X_test.npy', label_testset_path='processed_data/y_test.npy', clip_values=(0, 1), nb_classes=2, input_shape=(1, 3, 64, 64), model_class_file='model.py', model_class_name='model', LossFn='', Optimizer='', epsilon=0.2): url = re.compile(r"https?://") cos = Minio(url.sub('', object_storage_url), access_key=object_storage_username, secret_key=object_storage_password) dataset_filenamex = "X_test.npy" dataset_filenamey = "y_test.npy" weights_filename = "model.pt" model_files = model_id + '/_submitted_code/model.zip' cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex) cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey) cos.fget_object(result_bucket_name, model_id + '/' + weights_filename, weights_filename) cos.fget_object(result_bucket_name, model_files, 'model.zip') # Load PyTorch model definition from the source code. zip_ref = zipfile.ZipFile('model.zip', 'r') zip_ref.extractall('model_files') zip_ref.close() modulename = 'model_files.' + model_class_file.split('.')[0].replace( '-', '_') ''' We required users to define where the model class is located or follow some naming convention we have provided. ''' model_class = getattr(importlib.import_module(modulename), model_class_name) # load & compile model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = model_class().to(device) model.load_state_dict(torch.load(weights_filename, map_location=device)) # Define Loss and optimizer function for the PyTorch model if LossFn: loss_fn = eval(LossFn) else: loss_fn = torch.nn.CrossEntropyLoss() if Optimizer: optimizer = eval(Optimizer) else: optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer, input_shape, nb_classes) # load test dataset x = np.load(dataset_filenamex) y = np.load(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) return metrics
input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier((min_, max_), model=model) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples with FGSM epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon) # Evaluate the classifier on the adversarial examples scores = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
def main(argv): if len(argv) < 2: sys.exit("Not enough arguments provided.") global network_definition_filename, weights_filename, dataset_filename i = 1 while i <= 8: arg = str(argv[i]) print(arg) if arg == "--data": dataset_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--networkdefinition": network_definition_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--weights": weights_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--epsilon": epsilon = float(argv[i + 1]) i += 2 print("dataset : ", dataset_filename) print("network definition : ", network_definition_filename) print("weights : ", weights_filename) # load & compile model json_file = open(network_definition_filename, 'r') model_json = json_file.read() json_file.close() model = model_from_json(model_json) model.load_weights(weights_filename) comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } model.compile(**comp_params) # create keras classifier classifier = KerasClassifier((0, 1), model) # load data set pf = np.load(dataset_filename) x = pf['x_test'] y = pf['y_test'] # pre-process numpy array x = np.expand_dims(x, axis=3) x = x.astype('float32') / 255 y = np_utils.to_categorical(y, 10) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics = get_metrics(model, x, x_samples, y) print("metrics : ", metrics) report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt") with open(report_file, "w") as report: report.write(json.dumps(metrics)) adv_samples_file = os.path.join(os.environ["RESULT_DIR"], 'adv_samples') print("adversarial samples saved to : ", adv_samples_file) np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
# accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100)) # PGD-100 adv_crafter_pgd_100 = ProjectedGradientDescent(mnist_classifier, max_iter=100, batch_size=batch_size) x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100)) # FGSM adv_crafter_fgsm = FastGradientMethod(mnist_classifier, eps=epsilon, batch_size=batch_size) x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after FGSM attack: {}%'.format(accuracy * 100)) # DeepFool adv_crafter_deepfool = CarliniLInfMethod(mnist_classifier, batch_size=batch_size) x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array) predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100)) # C&W
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test FGSM with np.inf norm attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test, **{'batch_size': 2}) x_train_adv = attack.generate(x_train, **{'batch_size': 4}) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100)) # Test minimal perturbations attack_params = {"minimal": True, "eps_step": .1, "eps_max": 1.} x_train_adv_min = attack.generate(x_train, **attack_params) x_test_adv_min = attack.generate(x_test, **attack_params) self.assertFalse((x_train_adv_min == x_train_adv).all()) self.assertFalse((x_test_adv_min == x_test_adv).all()) self.assertFalse((x_train == x_train_adv_min).all()) self.assertFalse((x_test == x_test_adv_min).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv_min)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv_min)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on adversarial train examples with minimal perturbation: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with minimal perturbation: %.2f%%', (acc * 100)) # L_1 norm attack = FastGradientMethod(classifier, eps=1, norm=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with L1 norm: %.2f%%', (acc * 100)) # L_2 norm attack = FastGradientMethod(classifier, eps=1, norm=2) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with L2 norm: %.2f%%', (acc * 100))
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws, current_line, attack_name, flag, column_i): classifier = KerasClassifier((0., 1.), model=model) if attack_name == "FGM": # ===========================参数设置========================= # # Maximum perturbation # Order of the norm parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1], [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2], [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf], [0.15, np.inf], [0.20, np.inf], [0.25, np.inf], [0.30, np.inf], [0.35, np.inf], [0.40, np.inf], [0.45, np.inf], [0.50, np.inf]] # ===========================进行攻击========================= # for [epsilon, norm_type] in parameter_lst: # print("current parameter: " + str(epsilon) + ", " + str(norm_type)) adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], eps=epsilon, norm=norm_type) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "BIM": # ===========================参数设置========================= # # Order of the norm # Maximum perturbation that the attacker can introduce # Attack step size (input variation) at each iteration # The maximum number of iterations. parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10], [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10], [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50], [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50], [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50], [2, 2.0, 0.2, 10], [2, 2.0, 0.4, 10], [2, 2.0, 0.6, 10], [2, 2.0, 0.8, 10], [2, 2.0, 1.0, 10], [2, 2.0, 0.2, 50], [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50], [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50], [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10], [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10], [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50], [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50], [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]] # ===========================进行攻击========================= # for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst: # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str( # max_iteration)) adv_crafter = BasicIterativeMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], norm=norm_type, eps=epsilon, eps_step=epsilon_step, max_iter=max_iteration) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " + str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "JSMA": # ===========================参数设置========================= # # Perturbation introduced to each modified feature per step (can be positive or negative). # Maximum percentage of perturbed features (between 0 and 1). parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5], [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5], [-0.4, 0.5], [-0.5, 0.5]] # ===========================进行攻击========================= # for [theta, gamma] in parameter_lst: # print("current parameter: " + str(theta) + ", " + str(gamma)) adv_crafter = SaliencyMapMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], theta=theta, gamma=gamma) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "DeepFool": # ===========================参数设置========================= # # The maximum number of iterations. # Overshoot parameter. parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10], [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10], [18, 0.10], [20, 0.10]] # ===========================进行攻击========================= # for [max_iteration, epsilon] in parameter_lst: # print("current parameter: " + str(max_iteration) + ", " + str(epsilon)) adv_crafter = DeepFool(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], max_iter=max_iteration, epsilon=epsilon) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "CW-L2": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniL2Method(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 elif attack_name == "CW-Linf": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniLInfMethod(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 current_line += 1 # print("\n------------------------------------------------") return ws, current_line
input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples with FGSM epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier, eps=epsilon) x_test_adv = adv_crafter.generate(x=x_test) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
def attack(self, model=None, attack_str=""): imgs = self._load_images(attack_str, self._test_or_val_dataset) if self._test_or_val_dataset == "_x_test_set_": X = self.__data.x_test Y = self.__data.y_test else: X = self.__data.x_val Y = self.__data.y_val if type(imgs) != type(None): print('\n{0} adversarial examples using {1} attack loaded...\n'. format(self.__dataset, self.__attack)) return imgs if type(model) == type(None): model = self.surrogate_model.fit(self.__data.x_train, self.__data.y_train, verbose=1, epochs=self.__epochs, batch_size=128) wrap = KerasClassifier((0., 1.), model=self.surrogate_model) else: wrap = KerasClassifier((0., 1.), model=model) if self.__attack == 'FGSM': print('\nCrafting adversarial examples using FGSM attack...\n') fgsm = FastGradientMethod(wrap) if self.__data.dataset_name == 'MNIST': x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.2) else: x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.025) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "fgsm.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack.startswith("CW"): print('\nCrafting adversarial examples using CW attack...\n') cw = CarliniL2Method(wrap, confidence=0.0, targeted=False, binary_search_steps=1, learning_rate=0.2, initial_const=10, max_iter=100) x_adv_images = cw.generate(X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'BIM': print('\nCrafting adversarial examples using BIM attack...\n') if self.__dataset == 'MNIST': bim = BasicIterativeMethod(wrap, eps=0.25, eps_step=0.2, max_iter=100, norm=np.inf) if self.__dataset == 'CIFAR': bim = BasicIterativeMethod(wrap, eps=0.025, eps_step=0.01, max_iter=1000, norm=np.inf) x_adv_images = bim.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'DEEPFOOL': print('\nCrafting adversarial examples using DeepFool attack...\n') deepfool = DeepFool(wrap) x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "deepfool.pkl") helpers.save_pkl(x_adv_images, path) return x_adv_images