def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
def test_multi_attack_mnist_with_generator(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. Here a generator is used to yield the data for adversarial training :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): self.x = x self.y = y self.size = size self.batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return (self.x[ids], self.y[ids]) generator = MyDataGenerator(x_train, y_train, x_train.shape[0], BATCH_SIZE) # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2} adv_trainer.fit_generator(generator, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100)) # Finally assert that the original training data hasn't changed: self.assertTrue((x_train == x_train_original).all())
def deep_fool(x_test, model, max_iter, epsilon, nb_grads, batch_size): classifier = KerasClassifier(model=model, clip_values=(0, 1)) attack_cw = DeepFool(classifier=classifier, max_iter=max_iter, epsilon=epsilon, nb_grads=nb_grads, batch_size=batch_size) x_test_adv = attack_cw.generate(x_test) return np.reshape(x_test_adv, (32, 32, 3))
def atk_DeepFool(x_train, x_test, y_train, y_test, classifier): #print('Create DeepFool attack \n') adv_crafter = DeepFool(classifier, max_iter=20) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) print("After DeepFool Attack \n") evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier) return x_test_adv, x_train_adv
def test_pytorch_mnist(self): x_train = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test_original = x_test.copy() # Create basic PyTorch model classifier = get_image_classifier_pt(from_logits=True) scores = get_labels_np_array(classifier.predict(x_train)) sum6 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum6 / self.y_train_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test)) sum7 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum7 / self.y_test_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum8 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum8 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum9 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum9 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_partial_grads(self): attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(self.classifier_k.predict(x_test_adv)) self.assertFalse((self.y_test == test_y_pred).all()) accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
def test_tensorflow_mnist(self): x_test_original = self.x_test_mnist.copy() # Create basic CNN on MNIST using TensorFlow classifier, sess = get_image_classifier_tf(from_logits=True) scores = get_labels_np_array(classifier.predict(self.x_train_mnist)) sum2 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum2 / self.y_train_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(self.x_test_mnist)) sum3 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum3 / self.y_test_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum4 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum4 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum5 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum5 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_iris_k_unbounded(self): classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_pt(self): classifier = get_iris_classifier_pt() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_kera_mnist_partial_grads(self): classifier = get_image_classifier_kr(from_logits=True) attack = DeepFool(classifier, max_iter=2, nb_grads=3) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum10 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum10 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100))
def __init__(self, model, overshoot=1e-6, max_iterations=100, n_candidates=10, batch_size=16): super().__init__(model=model) self._overshoot = overshoot self._max_iterations = max_iterations self._n_candidates = n_candidates self._method = DeepFool(classifier=self.model, epsilon=self._overshoot, max_iter=self._max_iterations, nb_grads=self._n_candidates, batch_size=batch_size)
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.36) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_two_attacks_with_generator(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): self.x = x self.y = y self.size = size self.batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, x_train.shape[0], 128) attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit_generator(generator, nb_epochs=5) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100)) # Finally assert that the original training data hasn't changed: self.assertTrue((x_train == x_train_original).all())
def test_keras_mnist(self): x_test_original = self.x_test_mnist.copy() # Keras classifier classifier = get_image_classifier_kr(from_logits=True) scores = classifier._model.evaluate(self.x_train_mnist, self.y_train_mnist) logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%", (scores[1] * 100)) scores = classifier._model.evaluate(self.x_test_mnist, self.y_test_mnist) logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%", (scores[1] * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum1 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum1 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_keras_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all()) accuracy = np.sum( predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
def general_test_v2(model, optimizer, input_shape, nb_classes, test_loader, method, conf, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: checked_state = torch.load(model_file)['state_dict'] model.load_state_dict(checked_state) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=32) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=32) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=32) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=32) elif method == 'FGSM': adv_crafter = FastGradientMethod(warped_model, batch_size=32) correct, total = 0, 0 adv_dataset = adv_generalization(test_loader, adv_crafter, conf) temp_loader = DataLoader(dataset=adv_dataset, batch_size=32, shuffle=False, drop_last=True) # temp_loader = test_loader for images, labels in temp_loader: images = Variable(images.cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) return correct / total
def _test_backend_mnist(self, classifier): attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_train == x_train_adv).all()) self.assertFalse((self.x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train == train_y_pred).all()) self.assertFalse((self.y_test == test_y_pred).all()) accuracy = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train, axis=1)) / self.y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (accuracy * 100)) accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
def test_classifier_type_check_fail_classifier(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = DeepFool(classifier=classifier) self.assertIn('For `DeepFool` classifier must be an instance of ' '`art.classifiers.classifier.Classifier`, the provided classifier is instance of ' '(<class \'object\'>,).', str(context.exception))
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_classifier_type_check_fail_gradients(self): # Use a test classifier not providing gradients required by white-box attack from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier classifier = ScikitlearnDecisionTreeClassifier(model=DecisionTreeClassifier()) with self.assertRaises(TypeError) as context: _ = DeepFool(classifier=classifier) self.assertIn('For `DeepFool` classifier must be an instance of ' '`art.classifiers.classifier.ClassifierNeuralNetwork` and ' '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of ' '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
def test_two_attacks_with_generator(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() x_test_original = x_test.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): super().__init__(size=size, batch_size=batch_size) self.x = x self.y = y self._size = size self._batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, size=x_train.shape[0], batch_size=16) attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit_generator(generator, nb_epochs=3) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02) self.assertAlmostEqual(accuracy, 0.11, delta=0.0) # Check that x_train and x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_train_original - x_train))), 0.0, delta=0.00001) self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def general_test(model, optimizer, input_shape, nb_classes, test_loader, method, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: model.load_state_dict(torch.load(model_file)) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=20) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=20) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=20) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=20) correct, total = 0, 0 class_correct = list(0. for _ in range(10)) class_total = list(0. for _ in range(10)) for images, labels in test_loader: images = adv_crafter.generate(images.numpy()) images = Variable(torch.from_numpy(images).cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() c = (predicted == labels.data).squeeze() for i in range(20): label = labels.data[i] class_correct[label] += c[i] class_total[label] += 1 print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) for i in range(10): print('Accuracy of %5s : %2d %%' % (_classes[i], 100 * class_correct[i] / class_total[i])) return correct / total
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))
class DeepFoolAttack(AdversarialAttack): def __init__(self, model, overshoot=1e-6, max_iterations=100, n_candidates=10, batch_size=16): super().__init__(model=model) self._overshoot = overshoot self._max_iterations = max_iterations self._n_candidates = n_candidates self._method = DeepFool(classifier=self.model, epsilon=self._overshoot, max_iter=self._max_iterations, nb_grads=self._n_candidates, batch_size=batch_size) def attack_method(self, x, y=None): return self._method.generate(x=x)
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3): model.eval() wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class) if method == 'deepfool': adv_crafter = DeepFool(wmodel) elif method == 'bim': adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size) elif method == 'jsma': adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size) elif method == 'cw2': adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size) elif method == 'cwi': adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size) elif method == 'fgsm': adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size) elif method == 'pgd': adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps) else: raise NotImplementedError('Unsupported Attack Method: {}'.format(method)) return adv_crafter
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def gzsl_launch(dataloader_seen, dataloader_unseen, all_vectors, criterion, params): if params["dataset"] == "CUB": from configs.config_CUB import MODEL_PATH, SMOOTHED_MODEL_PATH elif params["dataset"] == "AWA2": from configs.config_AWA2 import MODEL_PATH, SMOOTHED_MODEL_PATH elif params["dataset"] == "SUN": from configs.config_SUN import MODEL_PATH, SMOOTHED_MODEL_PATH resnet = torchvision.models.resnet101(pretrained=True).cuda() feature_extractor = nn.Sequential(*list(resnet.children())[:-1]) if params["hasDefense"] and params["defense"] == "label_smooth": model_ale = torch.load(SMOOTHED_MODEL_PATH).cuda() else: model_ale = torch.load(MODEL_PATH).cuda() full_graph = FullGraph(feature_extractor, model_ale, all_vectors).cuda() full_graph.eval() optimizer = optim.SGD(full_graph.parameters(), lr=0.01, momentum=0.5) if params["dataset"] == "CUB": no_classes = 200 elif params["dataset"] == "AWA2": no_classes = 50 elif params["dataset"] == "SUN": no_classes = 717 classifier = PyTorchClassifier(model=full_graph, loss=criterion, optimizer=optimizer, input_shape=(1, 150, 150), nb_classes=no_classes) if params["attack"] == "fgsm": batch_size = 1 attack = FastGradientMethod(classifier=classifier, eps=params["fgsm_params"]["epsilon"], batch_size=batch_size) elif params["attack"] == "deepfool": batch_size = 1 attack = DeepFool(classifier, max_iter=params["deepfool_params"]["max_iter"], epsilon=params["deepfool_params"]["epsilon"], nb_grads=params["deepfool_params"]["nb_grads_gzsl"], batch_size=batch_size) elif params["attack"] == "carlini_wagner": batch_size = params["batch_size"] if params["custom_collate"] else 1 attack = CarliniL2Method( classifier, confidence=params["carliniwagner_params"]["confidence"], learning_rate=params["carliniwagner_params"]["learning_rate"], binary_search_steps=params["carliniwagner_params"] ["binary_search_steps"], max_iter=params["carliniwagner_params"]["max_iter"], initial_const=params["carliniwagner_params"]["initial_const"], max_halving=params["carliniwagner_params"]["max_halving"], max_doubling=params["carliniwagner_params"]["max_doubling"], batch_size=batch_size) preds_seen = [] preds_seen_defended = [] adv_preds_seen = [] adv_preds_seen_defended = [] labels_seen_ = [] start = time.time() if params["hasDefense"]: if params["defense"] == "spatial_smooth": defense = SpatialSmoothing( window_size=params["ss_params"]["window_size"]) elif params["defense"] == "totalvar": defense = TotalVarMin( max_iter=params["totalvar_params"]["max_iter"]) for index, sample in enumerate(dataloader_seen): img = sample[0].numpy() label = sample[1].numpy() if params["clean_results"]: if params["hasDefense"] and params["defense"] != "label_smooth": img_def, _ = defense(img) predictions_defended = classifier.predict( img_def, batch_size=batch_size) preds_seen_defended.extend( np.argmax(predictions_defended, axis=1)) predictions = classifier.predict(img, batch_size=batch_size) preds_seen.extend(np.argmax(predictions, axis=1)) img_perturbed = attack.generate(x=img) if params["hasDefense"] and params["defense"] != "label_smooth": img_perturbed_defended, _ = defense(img_perturbed) predictions_adv_defended = classifier.predict( img_perturbed_defended, batch_size=batch_size) adv_preds_seen_defended.extend( np.argmax(predictions_adv_defended, axis=1)) predictions_adv = classifier.predict(img_perturbed, batch_size=batch_size) adv_preds_seen.extend(np.argmax(predictions_adv, axis=1)) labels_seen_.extend(label) if index % 1000 == 0: print(index, len(dataloader_seen)) labels_seen_ = np.array(labels_seen_) adv_preds_seen = np.array(adv_preds_seen) adv_preds_seen_defended = np.array(adv_preds_seen_defended) uniq_labels_seen = np.unique(labels_seen_) adv_preds_unseen = [] adv_preds_unseen_defended = [] labels_unseen_ = [] if params["clean_results"]: preds_unseen = [] preds_seen = np.array(preds_seen) preds_unseen_defended = [] preds_seen_defended = np.array(preds_seen_defended) for index, sample in enumerate(dataloader_unseen): img = sample[0].numpy() label = sample[1].numpy() if params["clean_results"]: if params["hasDefense"] and params["defense"] != "label_smooth": img_def, _ = defense(img) predictions_defended = classifier.predict( img_def, batch_size=batch_size) preds_unseen_defended.extend( np.argmax(predictions_defended, axis=1)) predictions = classifier.predict(img, batch_size=batch_size) preds_unseen.extend(np.argmax(predictions, axis=1)) img_perturbed = attack.generate(x=img) if params["hasDefense"] and params["defense"] != "label_smooth": img_perturbed_defended, _ = defense(img_perturbed) predictions_adv_defended = classifier.predict( img_perturbed_defended, batch_size=batch_size) adv_preds_unseen_defended.extend( np.argmax(predictions_adv_defended, axis=1)) predictions_adv = classifier.predict(img_perturbed, batch_size=batch_size) adv_preds_unseen.extend(np.argmax(predictions_adv, axis=1)) labels_unseen_.extend(label) if index % 1000 == 0: print(index, len(dataloader_unseen)) end = time.time() labels_unseen_ = np.array(labels_unseen_) adv_preds_unseen = np.array(adv_preds_unseen) adv_preds_unseen_defended = np.array(adv_preds_unseen_defended) uniq_labels_unseen = np.unique(labels_unseen_) combined_labels = np.concatenate((labels_seen_, labels_unseen_)) combined_preds_adv = np.concatenate((adv_preds_seen, adv_preds_unseen)) combined_preds_adv_defended = np.concatenate( (adv_preds_seen_defended, adv_preds_unseen_defended)) if params["clean_results"]: preds_unseen = np.array(preds_unseen) combined_preds = np.concatenate((preds_seen, preds_unseen)) seen, unseen, h = harmonic_score_gzsl(combined_preds, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Clean (s/u/h):", seen, unseen, h) if params["hasDefense"] and params["defense"] != "label_smooth": preds_unseen_defended = np.array(preds_unseen_defended) combined_preds_defended = np.concatenate( (preds_seen_defended, preds_unseen_defended)) seen, unseen, h = harmonic_score_gzsl(combined_preds_defended, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Clean + defended (s/u/h):", seen, unseen, h) seen, unseen, h = harmonic_score_gzsl(combined_preds_adv, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Attacked (s/u/h):", seen, unseen, h) if params["hasDefense"] and params["defense"] != "label_smooth": seen, unseen, h = harmonic_score_gzsl(combined_preds_adv_defended, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Attacked + defended (s/u/h):", seen, unseen, h) print(end - start, "seconds passed for GZSL.")
model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(10)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Create classifier wrapper classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) # Craft adversarial samples with DeepFool logger.info('Create DeepFool attack') adv_crafter = DeepFool(classifier) logger.info('Craft attack on training examples') x_train_adv = adv_crafter.generate(x_train) logger.info('Craft attack test examples') x_test_adv = adv_crafter.generate(x_test) # Evaluate the classifier on the adversarial samples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Classifier before adversarial training') logger.info('Accuracy on adversarial samples: %.2f%%', (acc * 100)) # Data augmentation: expand the training set with the adversarial samples x_train = np.append(x_train, x_train_adv, axis=0) y_train = np.append(y_train, y_train, axis=0)
classifier = KerasClassifier(model=model, clip_values=(0, 1)) return classifier # Get session session = tf.Session() k.set_session(session) # Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() # Construct and train a convolutional neural network on MNIST using Keras source = cnn_mnist_k(x_train.shape[1:]) source.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Craft adversarial samples with DeepFool adv_crafter = DeepFool(source) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) # Construct and train a convolutional neural network target = cnn_mnist_tf(x_train.shape[1:]) target.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the CNN on the adversarial samples preds = target.predict(x_test_adv) acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax( y_test, axis=1))) / y_test.shape[0] print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))