def test_pytorch_mnist(self): x_train = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test_original = x_test.copy() # Create basic PyTorch model classifier = get_image_classifier_pt(from_logits=True) scores = get_labels_np_array(classifier.predict(x_train)) sum6 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum6 / self.y_train_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test)) sum7 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum7 / self.y_test_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum8 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum8 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum9 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum9 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def atk_DeepFool(x_train, x_test, y_train, y_test, classifier): #print('Create DeepFool attack \n') adv_crafter = DeepFool(classifier, max_iter=20) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) print("After DeepFool Attack \n") evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier) return x_test_adv, x_train_adv
def test_tensorflow_mnist(self): x_test_original = self.x_test_mnist.copy() # Create basic CNN on MNIST using TensorFlow classifier, sess = get_image_classifier_tf(from_logits=True) scores = get_labels_np_array(classifier.predict(self.x_train_mnist)) sum2 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum2 / self.y_train_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(self.x_test_mnist)) sum3 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum3 / self.y_test_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum4 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum4 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum5 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum5 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
def test_keras_mnist(self): x_test_original = self.x_test_mnist.copy() # Keras classifier classifier = get_image_classifier_kr(from_logits=True) scores = classifier._model.evaluate(self.x_train_mnist, self.y_train_mnist) logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%", (scores[1] * 100)) scores = classifier._model.evaluate(self.x_test_mnist, self.y_test_mnist) logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%", (scores[1] * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum1 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum1 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_multi_attack_mnist_with_generator(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. Here a generator is used to yield the data for adversarial training :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): self.x = x self.y = y self.size = size self.batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return (self.x[ids], self.y[ids]) generator = MyDataGenerator(x_train, y_train, x_train.shape[0], BATCH_SIZE) # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2} adv_trainer.fit_generator(generator, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100)) # Finally assert that the original training data hasn't changed: self.assertTrue((x_train == x_train_original).all())
def general_test(model, optimizer, input_shape, nb_classes, test_loader, method, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: model.load_state_dict(torch.load(model_file)) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=20) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=20) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=20) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=20) correct, total = 0, 0 class_correct = list(0. for _ in range(10)) class_total = list(0. for _ in range(10)) for images, labels in test_loader: images = adv_crafter.generate(images.numpy()) images = Variable(torch.from_numpy(images).cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() c = (predicted == labels.data).squeeze() for i in range(20): label = labels.data[i] class_correct[label] += c[i] class_total[label] += 1 print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) for i in range(10): print('Accuracy of %5s : %2d %%' % (_classes[i], 100 * class_correct[i] / class_total[i])) return correct / total
def deep_fool(x_test, model, max_iter, epsilon, nb_grads, batch_size): classifier = KerasClassifier(model=model, clip_values=(0, 1)) attack_cw = DeepFool(classifier=classifier, max_iter=max_iter, epsilon=epsilon, nb_grads=nb_grads, batch_size=batch_size) x_test_adv = attack_cw.generate(x_test) return np.reshape(x_test_adv, (32, 32, 3))
def test_partial_grads(self): attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(self.classifier_k.predict(x_test_adv)) self.assertFalse((self.y_test == test_y_pred).all()) accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
def _test_backend_mnist(self, classifier): attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(self.x_train) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_train == x_train_adv).all()) self.assertFalse((self.x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train == train_y_pred).all()) self.assertFalse((self.y_test == test_y_pred).all()) accuracy = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train, axis=1)) / self.y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (accuracy * 100)) accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
def test_iris_k_unbounded(self): classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_pt(self): classifier = get_iris_classifier_pt() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_kera_mnist_partial_grads(self): classifier = get_image_classifier_kr(from_logits=True) attack = DeepFool(classifier, max_iter=2, nb_grads=3) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum10 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum10 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100))
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
def test_keras_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all()) accuracy = np.sum( predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
class DeepFoolAttack(AdversarialAttack): def __init__(self, model, overshoot=1e-6, max_iterations=100, n_candidates=10, batch_size=16): super().__init__(model=model) self._overshoot = overshoot self._max_iterations = max_iterations self._n_candidates = n_candidates self._method = DeepFool(classifier=self.model, epsilon=self._overshoot, max_iter=self._max_iterations, nb_grads=self._n_candidates, batch_size=batch_size) def attack_method(self, x, y=None): return self._method.generate(x=x)
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
# FGSM attack = FastGradientMethod(classifier=classifier, eps=epsilon, eps_step=epsilon / 10) x_test_adv = attack.generate(x=x_test / 255.0) predictions = classifier.predict(x_test_adv * 255.0) accuracy = np.sum( np.argmax(predictions, axis=1) == y_test) / len(y_test) print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100)) adv_crafter_deepfool = DeepFool(classifier, batch_size=batch_size, epsilon=epsilon) x_test_adv = adv_crafter_deepfool.generate(x=x_test / 255.0) predictions = classifier.predict(x_test_adv * 255.0) print(np.argmax(predictions, axis=1)) accuracy = np.sum( np.argmax(predictions, axis=1) == y_test) / len(y_test) print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100)) # pgd 20 adv_crafter_pgd_20 = ProjectedGradientDescent(classifier, eps=epsilon, eps_step=0.00775, max_iter=20, batch_size=batch_size) x_test_adv = adv_crafter_pgd_20.generate(x=x_test / 255.0) # print(x_test_adv) predictions = classifier.predict(x_test_adv * 255.0)
def deepfool(clf, x_train, x_test): from art.attacks import DeepFool deep_adv_crafter = DeepFool(clf) x_train_deepfool_adv = deep_adv_crafter.generate(x_train) x_test_deepfool_adv = deep_adv_crafter.generate(x_test) return x_train_deepfool_adv, x_test_deepfool_adv
def Deep_Fool(victims): adv_crafter = DeepFool(model) finalVictims = adv_crafter.generate(x=victims) return finalVictims
model.add(Dense(10)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Create classifier wrapper classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) # Craft adversarial samples with DeepFool logger.info('Create DeepFool attack') adv_crafter = DeepFool(classifier) logger.info('Craft attack on training examples') x_train_adv = adv_crafter.generate(x_train) logger.info('Craft attack test examples') x_test_adv = adv_crafter.generate(x_test) # Evaluate the classifier on the adversarial samples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Classifier before adversarial training') logger.info('Accuracy on adversarial samples: %.2f%%', (acc * 100)) # Data augmentation: expand the training set with the adversarial samples x_train = np.append(x_train, x_train_adv, axis=0) y_train = np.append(y_train, y_train, axis=0) # Retrain the CNN on the extended dataset model.compile(loss='categorical_crossentropy',
# 创建分类器包装器classifier wrapper classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) # classifier._output = 0 # classifier.input_shape = (32,32,3) # epoch:1个epoch等于使用训练集中的全部样本训练一次 # batchsize:批大小,一般采用SGD训练,每次训练在训练集中取batchsize个样本训练 # iteration:1个iteration等于使用batchsize个样本训练一次 # 训练集有1000个样本,batchsize=10,那么训练完整个样本集需要:100次iteration,1次epoch # 用DeepFool制作对抗性样本 logger.info('Create DeepFool attack') adv_crafter = DeepFool(classifier) # 针对classifier训练一个对抗性的classifier logger.info('Craft attack on training examples') x_train_adv = adv_crafter.generate(x_train) # 用对抗性的classifier去训练image--对抗性示例 logger.info('Craft attack on testing examples') x_test_adv = adv_crafter.generate(x_test) # 在对抗性样本上评价分类器 preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('---Before adversarial training---') logger.info('Classifier before adversarial training') logger.info('Accuracy on adversarial samples: %.2f%%', (acc * 100)) # 数据扩充:用对抗性样本展开训练集 x_train = np.append(x_train, x_train_adv, axis=0) # 5000+5000=10000 # axis = 0 代表对横轴操作 axis = 1 代表对纵轴操作 y_train = np.append(y_train, y_train, axis=0) # 5000+5000=10000