def _test_mnist_targeted(self, classifier, x_test): x_test_original = x_test.copy() # Test FGSM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.01, targeted=True, batch_size=128, verbose=False) # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1) % 10, 10) pred_sort = classifier.predict(x_test).argsort(axis=1) y_test_adv = np.zeros((x_test.shape[0], 10)) for i in range(x_test.shape[0]): y_test_adv[i, pred_sort[i, -2]] = 1.0 x_test_adv = attack.generate(x_test, y=y_test_adv) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertEqual(y_test_adv.shape, test_y_pred.shape) # This doesn't work all the time, especially with small networks self.assertGreaterEqual((y_test_adv == test_y_pred).sum(), x_test.shape[0] // 2) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_9a_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.2, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))
def get_adversarial_examples(X, Y, model, nb_classes, attack=None): assert model is not None assert attack is not None art_classifier = SklearnClassifier(model=model, clip_values=(0, nb_classes)) attacker = None if attack == ATTACK.PGD: attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=0.2, eps_step=0.1, max_iter=3, targeted=False, num_random_init=0, batch_size=128) elif attack == ATTACK.DEEPFOOL: attacker = DeepFool(classifier=art_classifier, max_iter=5, epsilon=1e-6, nb_grads=3, batch_size=1) elif attack == ATTACK.FGSM: attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=0.3, targeted=False, batch_size=128) elif attack == ATTACK.BIM: attacker = BasicIterativeMethod(classifier=art_classifier, eps=0.3, eps_step=0.1, targeted=False, batch_size=128) elif attack == ATTACK.JSMA: attacker = SaliencyMapMethod(classifier=art_classifier, theta=0.3, gamma=0.5, batch_size=128) elif attack == ATTACK.CW_L2: attacker = CarliniL2Method(classifier=art_classifier, learning_rate=0.1) elif attack == ATTACK.CW_Linf: attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=0.01) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_5_pytorch_iris(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1.0, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def _test_backend_mnist(self, classifier, x_train, y_train, x_test, y_test): x_test_original = x_test.copy() # Test BIM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, batch_size=128) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (acc * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def _bim(model, data, labels, attack_args): """ Basic Iteractive Method ADVERSARIAL EXAMPLES IN THE PHYSICAL WORLD Alexey Kurakin, Ian J. Goodfellow, Samy Bengio ``https://arxiv.org/pdf/1607.02533.pdf`` :param model: :param data: :param labels: :param attack_args: :return: """ eps = attack_args.get('eps', 0.3) eps_step = attack_args.get('eps_step', eps / 10.) max_iter = attack_args.get('max_iter', 100) targeted = attack_args.get('targeted', False) print('>>> Generating BIM examples.') attacker = BasicIterativeMethod(classifier=model, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted) return attacker.generate(data, labels)
def test_8_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.estimators.classification.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with BIM adversarial examples: " "%.2f%%", (acc * 100), ) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1.0, eps_step=0.1, batch_size=128, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info( "Success rate of " + classifier.__class__.__name__ + " on targeted BIM on Iris: %.2f%%", (acc * 100) ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def _test_backend_mnist(self, classifier, x_train, y_train, x_test, y_test): x_test_original = x_test.copy() # Test BIM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.1, batch_size=128, verbose=False) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (acc * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001) # Test eps of array type 1 eps = np.ones(shape=x_test.shape) * 1.0 eps_step = np.ones_like(eps) * 0.1 attack_params = {"eps_step": eps_step, "eps": eps} attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) # Test eps of array type 2 eps = np.ones(shape=x_test.shape[1:]) * 1.0 eps_step = np.ones_like(eps) * 0.1 attack_params = {"eps_step": eps_step, "eps": eps} attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) # Test eps of array type 3 eps = np.ones(shape=x_test.shape[2:]) * 1.0 eps_step = np.ones_like(eps) * 0.1 attack_params = {"eps_step": eps_step, "eps": eps} attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) # Test eps of array type 4 eps = np.ones(shape=x_test.shape[3:]) * 1.0 eps_step = np.ones_like(eps) * 0.1 attack_params = {"eps_step": eps_step, "eps": eps} attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all())
def craft(X, Y, art_classifier, attack=None, **attack_params): assert art_classifier is not None assert attack is not None attacker = None if attack == ATTACK.PGD: eps = attack_params.get('eps', 0.2) eps_step = attack_params.get('eps_step', eps / 5.) max_iter = attack_params.get('max_iter', 3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, num_random_init=0, batch_size=batch_size) elif attack == ATTACK.DEEPFOOL: eps = attack_params.get('eps', 1e-6) max_iter = attack_params.get('max_iter', 5) nb_grads = attack_params.get('nb_grads', 3) batch_size = attack_params.get('batch_size', 1) attacker = DeepFool(classifier=art_classifier, max_iter=max_iter, epsilon=eps, nb_grads=nb_grads, batch_size=batch_size) elif attack == ATTACK.FGSM: eps = attack_params.get('eps', 0.3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=eps, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.BIM: eps = attack_params.get('eps', 0.3) eps_step = attack_params.get('eps_step', eps / 5.) norm = attack_params.get('norm', np.inf) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = BasicIterativeMethod(classifier=art_classifier, norm=norm, eps=eps, eps_step=eps_step, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.JSMA: theta = attack_params.get('theta', 0.3) gamma = attack_params.get('gamma', 0.5) batch_size = attack_params.get('batch_size', 128) attacker = SaliencyMapMethod(classifier=art_classifier, theta=theta, gamma=gamma, batch_size=batch_size) elif attack == ATTACK.CW_L2: lr = attack_params.get('lr', 0.1) bsearch_steps = attack_params.get('bsearch_steps', 10) attacker = CarliniL2Method(classifier=art_classifier, learning_rate=lr, binary_search_steps=bsearch_steps) elif attack == ATTACK.CW_Linf: lr = attack_params.get('lr', 0.01) attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=lr) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
print("Creating classifier...\n") adv_classifier = TensorFlowV2Classifier( model=new_model, loss_object=loss_object, train_step=train_step, nb_classes=5, input_shape=(1, 25), clip_values=(0, 1), ) print("Creating adversarial attack object...\n") bim = BasicIterativeMethod(adv_classifier, eps=eps, eps_step=eps_step, targeted=False, batch_size=2048) print("Generating adversarial samples...\n") logger.info("Craft attack on training examples") x_train_adv = bim.generate(train_data) save_samples(x_train_adv, 'bim_train', exp) logger.info("=" * 50) logger.info("Craft attack test examples") x_test_adv = bim.generate(test_data) save_samples(x_test_adv, 'bim_test', exp) logger.info("=" * 50) print("Evaluating adversarial samples on clean model...\n")