def test_failure_attack(self): """ Test the corner case when attack is failed. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # Failure attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=0, learning_rate=0, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) self.assertTrue(np.allclose(x_test, x_test_adv, atol=1e-3)) # Clean-up session sess.close() tf.reset_default_graph()
def carlini_inf(x_test, model, eps, max_iter, learning_rate): classifier = KerasClassifier(model=model, clip_values=(0, 1)) attack_cw = CarliniLInfMethod(classifier=classifier, eps=eps, max_iter=max_iter, learning_rate=learning_rate) x_test_adv = attack_cw.generate(x_test) return np.reshape(x_test_adv, (32, 32, 3))
def test_pytorch_iris_LInf(self): classifier = get_tabular_classifier_pt() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris.astype(np.float32)) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_keras_iris_unbounded_LInf(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100))
def evaluate_cw(self, data_loader): eps = attack_configs['PGD'][self.dataset]['epsilon'] adv_crafter = CarliniLInfMethod(self.classifier, targeted=False, eps=eps) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc
def test_failure_attack(self): """ Test the corner case when attack is failed. :return: """ # Build a TFClassifier # Define input and output placeholders input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer logits = tf.layers.dense(fc, 10) # Train operator loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train = optimizer.minimize(loss) # Tensorflow session and initialization sess = tf.Session() sess.run(tf.global_variables_initializer()) # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Train the classifier tfc = TFClassifier((0, 1), input_ph, logits, output_ph, train, loss, None, sess) tfc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # Failure attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=0, learning_rate=0, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) np.testing.assert_almost_equal(x_test, x_test_adv, 3)
def general_test(model, optimizer, input_shape, nb_classes, test_loader, method, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: model.load_state_dict(torch.load(model_file)) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=20) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=20) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=20) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=20) correct, total = 0, 0 class_correct = list(0. for _ in range(10)) class_total = list(0. for _ in range(10)) for images, labels in test_loader: images = adv_crafter.generate(images.numpy()) images = Variable(torch.from_numpy(images).cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() c = (predicted == labels.data).squeeze() for i in range(20): label = labels.data[i] class_correct[label] += c[i] class_total[label] += 1 print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) for i in range(10): print('Accuracy of %5s : %2d %%' % (_classes[i], 100 * class_correct[i] / class_total[i])) return correct / total
def test_keras_iris_clipped_LInf(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all()) accuracy = np.sum( predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (accuracy * 100))
def general_test_v2(model, optimizer, input_shape, nb_classes, test_loader, method, conf, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: checked_state = torch.load(model_file)['state_dict'] model.load_state_dict(checked_state) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=32) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=32) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=32) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=32) elif method == 'FGSM': adv_crafter = FastGradientMethod(warped_model, batch_size=32) correct, total = 0, 0 adv_dataset = adv_generalization(test_loader, adv_crafter, conf) temp_loader = DataLoader(dataset=adv_dataset, batch_size=32, shuffle=False, drop_last=True) # temp_loader = test_loader for images, labels in temp_loader: images = Variable(images.cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) return correct / total
def test_tensorflow_failure_attack_LInf(self): """ Test the corner case when attack is failed. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=0, learning_rate=0, eps=0.5) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes())} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) self.assertTrue(np.allclose(self.x_test_mnist, x_test_adv, atol=1e-3)) # Clean-up session if sess is not None: sess.close()
def test_pytorch_mnist_LInf(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes())} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_tensorflow_iris_LInf(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted C&W on Iris: %.2f%%', (acc * 100))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_classifier_type_check_fail_classifier_LInf(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = CarliniLInfMethod(classifier=classifier) self.assertIn( 'For `CarliniLInfMethod` classifier must be an instance of ' '`art.classifiers.classifier.Classifier`, the provided classifier is instance of ' '(<class \'object\'>,).', str(context.exception))
def test_scikitlearn_LInf(self): from sklearn.linear_model import LogisticRegression from art.classifiers.scikitlearn import ScikitlearnLogisticRegression scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression } # , # SVC: ScikitlearnSVC, # LinearSVC: ScikitlearnSVC} (_, _), (x_test, y_test) = self.iris for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all()) accuracy = np.sum( predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with C&W adversarial examples: ' '%.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax( targets, axis=1)) / y_test.shape[0] logger.info( 'Success rate of ' + classifier.__class__.__name__ + ' on targeted C&W on Iris: %.2f%%', (accuracy * 100))
def test_classifier_type_check_fail_gradients_LInf(self): # Use a test classifier not providing gradients required by white-box attack from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier classifier = ScikitlearnDecisionTreeClassifier( model=DecisionTreeClassifier()) with self.assertRaises(TypeError) as context: _ = CarliniLInfMethod(classifier=classifier) self.assertIn( 'For `CarliniLInfMethod` classifier must be an instance of ' '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of ' '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
def test_scikitlearn_LInf(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with C&W adversarial examples: " "%.2f%%", (accuracy * 100), ) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info( "Success rate of " + classifier.__class__.__name__ + " on targeted C&W on Iris: %.2f%%", (accuracy * 100), ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack, no batching clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5, batch_size=1) x_test_adv = clinfm.generate(x_test) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up session sess.close() tf.reset_default_graph()
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First attack clinfm = CarliniLInfMethod(classifier=krc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=krc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up k.clear_session()
def test_tensorflow_mnist_LInf(self): """ First test with the TensorFlowClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist # Build TensorFlowClassifier tfc, sess = get_classifier_tf(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes())} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack, no batching clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5, batch_size=1) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up session sess.close()
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3): model.eval() wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class) if method == 'deepfool': adv_crafter = DeepFool(wmodel) elif method == 'bim': adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size) elif method == 'jsma': adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size) elif method == 'cw2': adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size) elif method == 'cwi': adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size) elif method == 'fgsm': adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size) elif method == 'pgd': adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps) else: raise NotImplementedError('Unsupported Attack Method: {}'.format(method)) return adv_crafter
def test_keras_mnist_LInf(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc = get_image_classifier_kr(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=krc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, krc.nb_classes())} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.000001) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=krc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.000001) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up k.clear_session()
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(self.y_test, ptc.nb_classes())} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) # Third attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) params = {} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer logits = tf.layers.dense(fc, 10) # Train operator loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train = optimizer.minimize(loss) # Tensorflow session and initialization sess = tf.Session() sess.run(tf.global_variables_initializer()) # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Train the classifier tfc = TFClassifier((0, 1), input_ph, logits, output_ph, train, loss, None, sess) tfc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5) params = {} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', y_pred) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any()) # First attack without batching clinfmwob = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, eps=0.5, batch_size=1) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfmwob.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack without batching clinfmwob = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5, batch_size=1) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = clinfmwob.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack without batching clinfmwob = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, eps=0.5, batch_size=1) params = {} x_test_adv = clinfmwob.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', y_pred) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any())
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack clinfm = CarliniLInfMethod(classifier=krc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=krc, targeted=False, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', target) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack clinfm = CarliniLInfMethod(classifier=krc, targeted=False, max_iter=10, eps=0.5) params = {} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(krc.predict(x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('CW0 Target: %s', y_pred) logger.debug('CW0 Actual: %s', y_pred_adv) logger.info('CW0 Success Rate: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any())
def get_adversarial(targeted, attack_name, classifier, xs, target_ys, batch_size, dataset, fgsm_epsilon=0, cwl2_confidence=0): # The attack attack = '' samples_range = xs.shape[0] #====================================== if attack_name == 'FastGradientMethod': # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False attack = FastGradientMethod(classifier=classifier, targeted=targeted, eps=fgsm_epsilon, batch_size=batch_size) #===================================== elif attack_name == 'CarliniLInfMethod': # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5, #max_doubling=5, eps=0.3, batch_size=128 attack = CarliniLInfMethod(classifier=classifier, max_iter=1000, targeted=targeted, batch_size=batch_size) #------------------------------- elif attack_name == 'UniversalPerturbation': # attacker='deepfool', attacker_params=None, delta=0.2, # max_iter=20, eps=10.0, norm=np.inf if targeted: print('UniversalPerturbation attack cannot be targeted.') exit() attack = UniversalPerturbation(classifier=classifier, max_iter=5) #============================================== elif attack_name == 'ProjectedGradientDescent': # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100, # targeted=False, num_random_init=0, batch_size=1 if dataset == 'mnist': attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=.3, eps_step=0.01, num_random_init=0, max_iter=40, batch_size=batch_size) else: attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=8.0, eps_step=2.0, num_random_init=0, max_iter=7, batch_size=batch_size) if targeted: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :], y=target_ys[0:batch_size]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :], y=target_ys[ii:ii + batch_size]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate( last_samples, y=target_ys[last_ii + batch_size:]) ################ adv = np.concatenate((adv, adv_samples), axis=0) else: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate(last_samples) ################ adv = np.concatenate((adv, adv_samples), axis=0) adv = np.asarray(adv) return adv