def test_2_tensorflow_iris(self): classifier, sess = get_tabular_classifier_tf() # Test untargeted attack and norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test untargeted attack and norm=np.inf attack = HopSkipJump( classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False ) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack and norm=2 targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Test targeted attack and norm=np.inf targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump( classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False ) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Clean-up session if sess is not None: sess.close()
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3) # First attack ead = ElasticNet(classifier=ptc, targeted=True, max_iter=2) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = ead.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack ead = ElasticNet(classifier=ptc, targeted=False, max_iter=2) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = ead.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Create simple CNN model = Sequential() model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # First attack cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).all()) # Third attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) y_pred = np.argmax(krc.predict(x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_random_targets(self): y = np.array([3, 1, 4, 1, 5, 9]) y_ = to_categorical(y) random_y = random_targets(y, 10) self.assertTrue(np.all(y != random_y.argmax(axis=1))) random_y = random_targets(y_, 10) self.assertTrue(np.all(y != random_y.argmax(axis=1)))
def test_tensorflow_iris(self): (_, _), (x_test, y_test) = self.iris classifier, sess = get_iris_classifier_tf() # Test untargeted attack and norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Test untargeted attack and norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack and norm=2 targets = random_targets(y_test, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100)) # Test targeted attack and norm=np.inf targets = random_targets(y_test, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100)) # Clean-up session sess.close()
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Create simple CNN model = Sequential() model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack ead = ElasticNet(classifier=krc, targeted=True, max_iter=2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = ead.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('EAD Target: %s', target) logger.debug('EAD Actual: %s', y_pred_adv) logger.info('EAD Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack ead = ElasticNet(classifier=krc, targeted=False, max_iter=2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = ead.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug('EAD Target: %s', target) logger.debug('EAD Actual: %s', y_pred_adv) logger.info('EAD Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Kill Keras k.clear_session()
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = 1. - np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('EAD success rate on Iris: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ElasticNet(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Targeted EAD success rate on Iris: %.2f%%', (acc * 100))
def test_pytorch_iris(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() # Test untargeted attack attack = FastGradientMethod(classifier, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all()) accuracy = np.sum( predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = FastGradientMethod(classifier, targeted=True, eps=0.1, batch_size=128) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum( predictions_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted FGM on Iris: %.2f%%', (accuracy * 100))
def test_2_tensorflow_failure_attack(self): """ Test the corner case when attack fails. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Failure attack ead = ElasticNet( classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1, verbose=False, ) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = ead.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) np.testing.assert_almost_equal(self.x_test_mnist, x_test_adv, 3) # Clean-up session if sess is not None: sess.close()
def test_failure_attack(self): """ Test the corner case when attack is failed. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # Failure attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) self.assertTrue(np.allclose(x_test, x_test_adv, atol=1e-3)) # Clean-up session sess.close() tf.reset_default_graph()
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32) # First targeted attack boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20) params = {'y': random_targets(self.y_test, ptc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST x_test, y_test = self.mnist x_test = np.swapaxes(x_test, 1, 3) # First attack zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = zoo.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) logger.debug('ZOO target: %s', target) logger.debug('ZOO actual: %s', y_pred_adv) logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target)))) # Second attack zoo = ZooAttack(classifier=ptc, targeted=False, max_iter=10) x_test_adv = zoo.generate(x_test) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) y_pred = np.argmax(ptc.predict(x_test), axis=1) logger.debug('ZOO actual: %s', y_pred_adv) logger.info('ZOO success rate on MNIST: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred))))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) logger.info('CW2 Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) x_test_adv = cl2m.generate(x_test) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) logger.info('CW2 Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target))))
def test_pytorch_iris_pt(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted PGD on Iris: %.2f%%", (acc * 100))
def test_tensorflow_iris_LInf(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
def test_pytorch_mnist_LInf(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_pytorch_mnist_L2(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) logger.info("CW2 Success Rate: %.2f", (sum(target == y_pred_adv) / float(len(target)))) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) x_test_adv = cl2m.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) logger.info("CW2 Success Rate: %.2f", (sum(target != y_pred_adv) / float(len(target)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_failure_attack(self): """ Test the corner case when attack fails. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # Failure attack ead = ElasticNet(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = ead.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) np.testing.assert_almost_equal(x_test, x_test_adv, 3) # Kill TF sess.close() tf.reset_default_graph()
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc = get_classifier_kr() # First targeted attack boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20) params = {'y': random_targets(self.y_test, krc.nb_classes())} x_test_adv = boundary.generate(self.x_test, **params) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20) x_test_adv = boundary.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(krc.predict(self.x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Clean-up session k.clear_session()
def test_tensorflow_failure_attack_L0(self): """ Test the corner case when attack is failed. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack cl0m = CarliniL0Method( classifier=tfc, targeted=False, max_iter=1, batch_size=10, learning_rate=0.01, binary_search_steps=1, warm_start=True, verbose=False, ) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} _ = cl0m.generate(self.x_test_mnist, **params) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # self.assertTrue(np.allclose(self.x_test_mnist, x_test_adv, atol=1e-3)) # Clean-up session if sess is not None: sess.close()
def test_iris_tf(self): classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test, nb_classes=3) attack = BoundaryAttack(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(self.x_test, **{'y': targets}) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) accuracy = np.sum( preds_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0] logger.info('Success rate of targeted boundary on Iris: %.2f%%', (accuracy * 100))
def test_tensorflow_failure_attack_L2(self): """ Test the corner case when attack is failed. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = cl2m.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) np.testing.assert_array_almost_equal(self.x_test_mnist, x_test_adv, decimal=3) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_krclassifier(self): """ Test with a KerasClassifier. :return: """ # Build KerasClassifier krc = get_classifier_kr() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First attack (without EoT): fgsm = FastGradientMethod(classifier=krc, targeted=True) params = {'y': random_targets(y_test, krc.nb_classes())} x_test_adv = fgsm.generate(x_test, **params) # Second attack (with EoT): def t(x): return x def transformation(): while True: yield t eot = ExpectationOverTransformations(classifier=krc, sample_size=1, transformation=transformation) fgsm_with_eot = FastGradientMethod(classifier=eot, targeted=True) x_test_adv_with_eot = fgsm_with_eot.generate(x_test, **params) self.assertTrue( (np.abs(x_test_adv - x_test_adv_with_eot) < 0.001).all())
def test_tensorflow_iris(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) expected_x_test_adv = np.asarray([0.8479195, 0.42525578, 0.70166135, 0.28664514]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal(predictions_adv, np.asarray( [1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 2, 1, 2, 0, 2, 2, 1, 1, 2])) accuracy = 1.0 - np.sum(predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('EAD success rate on Iris: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ElasticNet(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(x_test, **{'y': targets}) expected_x_test_adv = np.asarray([0.8859426, 0.51877, 0.5014498, 0.05447771]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal(predictions_adv, np.asarray( [0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2])) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Targeted EAD success rate on Iris: %.2f%%', (accuracy * 100))
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) df = CarliniL2Method(classifier, sess=session, targeted=False, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1) params = {'y_val': random_targets(Y_test, classifier.model.get_output_shape_at(-1)[-1])} x_test_adv = df.generate(X_test, **params) self.assertFalse((X_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = CarliniL2Method(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = CarliniL2Method(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted C&W on Iris: %.2f%%', (acc * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted BIM on Iris: %.2f%%', (acc * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted PGD on Iris: %.2f%%', (acc * 100))