def test_check_params_LInf(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, max_iter="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, decrease_factor="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, decrease_factor=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, initial_const="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, initial_const=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, largest_const="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, largest_const=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, const_factor="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, const_factor=-1)
def _cw(model, data, labels, attack_args): """ Carlini & Wanger Towards Evaluating the Robustness of Neural Networks by Nicholas Carlini, David Wagner ``https://arxiv.org/abs/1608.04644`` :param model: :param data: :param labels: :param attack_args: :return: """ norm = attack_args.get('norm').lower() lr = attack_args.get('lr') max_iter = attack_args.get('max_iter', 100) # use default values for the following arguments confidence = attack_args.get('confidence', 0.0) targeted = attack_args.get('targeted', False) init_const = attack_args.get('init_const', 0.01) max_halving = attack_args.get('max_halving', 5) max_doubling = attack_args.get('max_doubling', 5) if norm == 'l2': print('>>> Generating CW_l2 examples.') binary_search_steps = attack_args.get('binary_search_steps', 10) attacker = CarliniL2Method(classifier=model, confidence=confidence, targeted=targeted, learning_rate=lr, binary_search_steps=binary_search_steps, max_iter=max_iter, initial_const=init_const, max_halving=max_halving, max_doubling=max_doubling) elif norm == 'linf': print('>>> Generating CW_linf examples.') eps = attack_args.get('eps', 0.3) attacker = CarliniLInfMethod(classifier=model, confidence=confidence, targeted=targeted, learning_rate=lr, max_iter=max_iter, max_halving=max_halving, max_doubling=max_doubling, eps=eps) else: raise ValueError( 'Support `l2` and `linf` norms. But found {}'.format(norm)) return attacker.generate(data, labels)
def test_pytorch_iris_LInf(self): classifier = get_tabular_classifier_pt() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris.astype(np.float32)) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_keras_iris_unbounded_LInf(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def get_adversarial_examples(X, Y, model, nb_classes, attack=None): assert model is not None assert attack is not None art_classifier = SklearnClassifier(model=model, clip_values=(0, nb_classes)) attacker = None if attack == ATTACK.PGD: attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=0.2, eps_step=0.1, max_iter=3, targeted=False, num_random_init=0, batch_size=128) elif attack == ATTACK.DEEPFOOL: attacker = DeepFool(classifier=art_classifier, max_iter=5, epsilon=1e-6, nb_grads=3, batch_size=1) elif attack == ATTACK.FGSM: attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=0.3, targeted=False, batch_size=128) elif attack == ATTACK.BIM: attacker = BasicIterativeMethod(classifier=art_classifier, eps=0.3, eps_step=0.1, targeted=False, batch_size=128) elif attack == ATTACK.JSMA: attacker = SaliencyMapMethod(classifier=art_classifier, theta=0.3, gamma=0.5, batch_size=128) elif attack == ATTACK.CW_L2: attacker = CarliniL2Method(classifier=art_classifier, learning_rate=0.1) elif attack == ATTACK.CW_Linf: attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=0.01) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_tensorflow_failure_attack_LInf(self): """ Test the corner case when attack is failed. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=0, learning_rate=0, eps=0.5) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) self.assertTrue(np.allclose(self.x_test_mnist, x_test_adv, atol=1e-3)) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_iris_LInf(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
def test_pytorch_mnist_LInf(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_scikitlearn_LInf(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.estimators.classification.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with C&W adversarial examples: " "%.2f%%", (accuracy * 100), ) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info( "Success rate of " + classifier.__class__.__name__ + " on targeted C&W on Iris: %.2f%%", (accuracy * 100), ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def test_tensorflow_mnist_LInf(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, initial_const=1, largest_const=1.1, verbose=False) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack, no batching clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, initial_const=1, largest_const=1.1, verbose=False) x_test_adv = clinfm.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up session if sess is not None: sess.close()
def test_keras_mnist_LInf(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc = get_image_classifier_kr(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=krc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, krc.nb_classes)} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.000001) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=krc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.000001) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up k.clear_session()
def craft(X, Y, art_classifier, attack=None, **attack_params): assert art_classifier is not None assert attack is not None attacker = None if attack == ATTACK.PGD: eps = attack_params.get('eps', 0.2) eps_step = attack_params.get('eps_step', eps / 5.) max_iter = attack_params.get('max_iter', 3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, num_random_init=0, batch_size=batch_size) elif attack == ATTACK.DEEPFOOL: eps = attack_params.get('eps', 1e-6) max_iter = attack_params.get('max_iter', 5) nb_grads = attack_params.get('nb_grads', 3) batch_size = attack_params.get('batch_size', 1) attacker = DeepFool(classifier=art_classifier, max_iter=max_iter, epsilon=eps, nb_grads=nb_grads, batch_size=batch_size) elif attack == ATTACK.FGSM: eps = attack_params.get('eps', 0.3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=eps, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.BIM: eps = attack_params.get('eps', 0.3) eps_step = attack_params.get('eps_step', eps / 5.) norm = attack_params.get('norm', np.inf) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = BasicIterativeMethod(classifier=art_classifier, norm=norm, eps=eps, eps_step=eps_step, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.JSMA: theta = attack_params.get('theta', 0.3) gamma = attack_params.get('gamma', 0.5) batch_size = attack_params.get('batch_size', 128) attacker = SaliencyMapMethod(classifier=art_classifier, theta=theta, gamma=gamma, batch_size=batch_size) elif attack == ATTACK.CW_L2: lr = attack_params.get('lr', 0.1) bsearch_steps = attack_params.get('bsearch_steps', 10) attacker = CarliniL2Method(classifier=art_classifier, learning_rate=lr, binary_search_steps=bsearch_steps) elif attack == ATTACK.CW_Linf: lr = attack_params.get('lr', 0.01) attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=lr) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv