def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False, "verbose": False }, } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5, "verbose": False } } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr() # Attack up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={"max_iter": 2, "targeted": False}) x_train_adv = up.generate(self.x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Attack up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(self.x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_mnist_adv = up.generate(x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_mnist_adv = x_test_mnist + up.noise self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_mnist_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_mnist_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def test_6_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Test untargeted attack attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = UniversalPerturbation(ptc, delta=-1) with self.assertRaises(ValueError): _ = UniversalPerturbation(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = UniversalPerturbation(ptc, eps=-1) with self.assertRaises(ValueError): _ = UniversalPerturbation(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = UniversalPerturbation(ptc, verbose="False")