def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted BIM on Iris: %.2f%%', (acc * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = 1. - np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('EAD success rate on Iris: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ElasticNet(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Targeted EAD success rate on Iris: %.2f%%', (acc * 100))
def test_iris_tf(self): classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test, nb_classes=3) attack = BoundaryAttack(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(self.x_test, **{'y': targets}) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) accuracy = np.sum( preds_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0] logger.info('Success rate of targeted boundary on Iris: %.2f%%', (accuracy * 100))
def test_iris_tf(self): classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax( self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test, nb_classes=3) attack = FastGradientMethod(classifier, targeted=True, eps=.1, batch_size=128) x_test_adv = attack.generate(self.x_test, **{'y': targets}) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax( targets, axis=1)) / self.y_test.shape[0] logger.info('Success rate of targeted FGM on Iris: %.2f%%', (accuracy * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_iris_tf(self): classifier, _ = get_iris_classifier_tf() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() attack = VirtualAdversarialMethod(classifier, eps=.1) with self.assertRaises(TypeError) as context: x_test_adv = attack.generate(x_test) self.assertIn( 'This attack requires a classifier predicting probabilities in the range [0, 1] as output.' 'Values smaller than 0.0 or larger than 1.0 have been detected.', str(context.exception))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with NewtonFool adversarial examples: %.2f%%', (acc * 100))
def test_iris_tf(self): (_, _), (x_test, y_test) = self.iris classifier, sess = get_iris_classifier_tf() # Test untargeted attack and norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Test untargeted attack and norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack and norm=2 targets = random_targets(y_test, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100)) # Test targeted attack and norm=np.inf targets = random_targets(y_test, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted HopSkipJump on Iris: %.2f%%', (acc * 100)) # Clean-up session sess.close()