def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() def t(x): return x def transformation(): while True: yield t # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Test untargeted attack attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris def t(x): return x def transformation(): while True: yield t classifier, _ = get_iris_classifier_kr() classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values krc = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) rs = RandomizedSmoothing(classifier=krc, sample_size=100, scale=0.01, alpha=0.001) attack = FastGradientMethod(rs, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_base = np.argmax(rs.predict(x_test), axis=1) preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all()) pred = rs.predict(x_test) pred2 = rs.predict(x_test_adv) acc, cov = compute_accuracy(pred, y_test) acc2, cov2 = compute_accuracy(pred2, y_test) logger.info( 'Accuracy on Iris with smoothing on adversarial examples: %.2f%%', (acc * 100)) logger.info( 'Coverage on Iris with smoothing on adversarial examples: %.2f%%', (cov * 100)) logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100)) logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted PGD on Iris: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): classifier, _ = get_iris_classifier_kr() # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax( self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test, nb_classes=3) attack = FastGradientMethod(classifier, targeted=True, eps=.1) x_test_adv = attack.generate(self.x_test, **{'y': targets}) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax( targets, axis=1)) / self.y_test.shape[0] logger.info('Success rate of targeted FGM on Iris: %.2f%%', (accuracy * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris krc, _ = get_iris_classifier_kr() rs = RandomizedSmoothing(classifier=krc, sample_size=100, scale=0.01, alpha=0.001) # Test untargeted attack attack = FastGradientMethod(krc, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_base = np.argmax(rs.predict(x_test), axis=1) preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all()) pred = rs.predict(x_test) pred2 = rs.predict(x_test_adv) acc, cov = compute_accuracy(pred, y_test) acc2, cov2 = compute_accuracy(pred2, y_test) logger.info( 'Accuracy on Iris with smoothing on adversarial examples: %.2f%%', (acc * 100)) logger.info( 'Coverage on Iris with smoothing on adversarial examples: %.2f%%', (cov * 100)) logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100)) logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100)) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all()) # check gradients grad_smooth1 = rs.loss_gradient(x=x_test, y=y_test) grad_smooth2 = rs.class_gradient(x=x_test, label=None) grad_smooth3 = rs.class_gradient(x=x_test, label=np.argmax(y_test, axis=1)) self.assertEqual(grad_smooth1.shape, x_test_adv.shape) self.assertEqual(grad_smooth2.shape[0], len(x_test)) self.assertEqual(grad_smooth3.shape[0], len(x_test)) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), len(x_test)) self.assertEqual(len(radius), len(x_test)) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all())
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_k_clipped(self): classifier, _ = get_iris_classifier_kr() attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_failure_feature_vectors(self): attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0, "learning_rate": 5.0, "number_of_steps": 5, "batch_size": 10} classifier, _ = get_iris_classifier_kr() attack = AdversarialPatch(classifier=classifier) attack.set_params(**attack_params) data = np.random.rand(10, 4) # Assert that value error is raised for feature vectors with self.assertRaises(ValueError) as context: attack.generate(data) self.assertIn('Feature vectors detected.', str(context.exception))
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with C&W adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = 1. - np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('EAD success rate on Iris: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): classifier, _ = get_iris_classifier_kr() attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = 1. - np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('EAD success rate on Iris: %.2f%%', (acc * 100))
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_failure_feature_vectors(self): attack_params = { "max_translation": 10.0, "num_translations": 3, "max_rotation": 30.0, "num_rotations": 3 } classifier, _ = get_iris_classifier_kr() attack = SpatialTransformation(classifier=classifier) attack.set_params(**attack_params) data = np.random.rand(10, 4) # Assert that value error is raised for feature vectors with self.assertRaises(ValueError) as context: attack.generate(data) self.assertIn('Feature vectors detected.', str(context.exception))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Clean-up session k.clear_session()
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Clean-up session k.clear_session()
def test_iris_k_unbounded(self): classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax( self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (accuracy * 100))