def test_tensorflow_failure_attack(self): """ Test the corner case when attack fails. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Failure attack zoo = ZooAttack(classifier=tfc, max_iter=0, binary_search_steps=0, learning_rate=0) x_test_mnist_adv = zoo.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) np.testing.assert_almost_equal(self.x_test_mnist, x_test_mnist_adv, 3) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test.copy() # Build KerasClassifier krc = get_classifier_kr() # Targeted attack # zoo = ZooAttack(classifier=krc, targeted=True, batch_size=5) # params = {'y': random_targets(self.y_test, krc.nb_classes())} # x_test_adv = zoo.generate(self.x_test, **params) # # self.assertFalse((self.x_test == x_test_adv).all()) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # target = np.argmax(params['y'], axis=1) # y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) # logger.debug('ZOO target: %s', target) # logger.debug('ZOO actual: %s', y_pred_adv) # logger.info('ZOO success rate on MNIST: %.2f', (sum(target == y_pred_adv) / float(len(target)))) # Untargeted attack # zoo = ZooAttack(classifier=krc, targeted=False, max_iter=20) zoo = ZooAttack(classifier=krc, targeted=False, batch_size=5) # x_test_adv = zoo.generate(x_test) params = {'y': random_targets(self.y_test, krc.nb_classes())} x_test_adv = zoo.generate(self.x_test, **params) # x_test_adv_true = [0.00000000e+00, 2.50167388e-04, 1.50529508e-04, 4.69674182e-04, # 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, # 1.67321396e-05, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, # 0.00000000e+00, 2.08451956e-06, 0.00000000e+00, 0.00000000e+00, # 2.53360748e-01, 9.60119188e-01, 9.85227525e-01, 2.53600776e-01, # 0.00000000e+00, 0.00000000e+00, 5.23251540e-04, 0.00000000e+00, # 0.00000000e+00, 0.00000000e+00, 1.08632184e-05, 0.00000000e+00] # # for i in range(14): # self.assertAlmostEqual(x_test_adv_true[i], x_test_adv[0, 14, i, 0]) # self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) y_pred = np.argmax(krc.predict(self.x_test), axis=1) logger.debug('ZOO actual: %s', y_pred_adv) logger.info('ZOO success rate on MNIST: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test))), 0.0, delta=0.00001) # Clean-up k.clear_session()
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_image_classifier_pt() # Get MNIST x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # First attack # zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10, binary_search_steps=10) # params = {'y': random_targets(self.y_test, ptc.nb_classes())} # x_test_adv = zoo.generate(x_test, **params) # self.assertFalse((x_test == x_test_adv).all()) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # target = np.argmax(params['y'], axis=1) # y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) # logger.debug('ZOO target: %s', target) # logger.debug('ZOO actual: %s', y_pred_adv) # logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target)))) # Second attack zoo = ZooAttack( classifier=ptc, targeted=False, learning_rate=1e-2, max_iter=10, binary_search_steps=3, abort_early=False, use_resize=False, use_importance=False, ) x_test_mnist_adv = zoo.generate(x_test_mnist) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) # print(x_test[0, 0, 14, :]) # print(x_test_adv[0, 0, 14, :]) # print(np.amax(x_test - x_test_adv)) x_test_adv_expected = [] # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "FGSM": attacker = FastGradientMethod(classifier=classifier, eps=0.3) elif attacker_name == "Elastic": attacker = ElasticNet(classifier=classifier, confidence=0.5) elif attacker_name == "BasicIterativeMethod": attacker = BasicIterativeMethod(classifier=classifier, eps=0.3) elif attacker_name == "NewtonFool": attacker = NewtonFool(classifier=classifier, max_iter=20) elif attacker_name == "HopSkipJump": attacker = HopSkipJump(classifier=classifier, max_iter=20) elif attacker_name == "ZooAttack": attacker = ZooAttack(classifier=classifier, max_iter=20) elif attacker_name == "VirtualAdversarialMethod": attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20) elif attacker_name == "UniversalPerturbation": attacker = UniversalPerturbation(classifier=classifier, max_iter=20) elif attacker_name == "AdversarialPatch": attacker = AdversarialPatch(classifier=classifier, max_iter=20) elif attacker_name == "Attack": attacker = Attack(classifier=classifier) elif attacker_name == "BoundaryAttack": attacker = BoundaryAttack(classifier=classifier, targeted=False, epsilon=0.05, max_iter=20) #, max_iter=20 elif attacker_name == "CarliniL2": attacker = CarliniL2Method(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "CarliniLinf": attacker = CarliniLInfMethod(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "DeepFool": attacker = DeepFool(classifier) elif attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_failure_attack(self): """ Test the corner case when attack fails. :return: """ # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # Failure attack zoo = ZooAttack(classifier=tfc, max_iter=0, binary_search_steps=0, learning_rate=0) x_test_adv = zoo.generate(self.x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) np.testing.assert_almost_equal(self.x_test, x_test_adv, 3) # Clean-up session sess.close()
def test_classifier_type_check_fail_classifier(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = ZooAttack(classifier=classifier) self.assertIn('For `ZooAttack` classifier must be an instance of `art.classifiers.classifier.Classifier`, the ' 'provided classifier is instance of (<class \'object\'>,).', str(context.exception))
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Targeted attack zoo = ZooAttack(classifier=tfc, targeted=True, max_iter=30, binary_search_steps=8, batch_size=128) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes())} x_test_mnist_adv = zoo.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_mnist_adv).all()) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1) logger.debug("ZOO target: %s", target) logger.debug("ZOO actual: %s", y_pred_adv) logger.info("ZOO success rate on MNIST: %.2f", (sum(target == y_pred_adv) / float(len(target)))) # Untargeted attack zoo = ZooAttack(classifier=tfc, targeted=False, max_iter=10, binary_search_steps=3) x_test_mnist_adv = zoo.generate(self.x_test_mnist) # self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1) logger.debug("ZOO actual: %s", y_pred_adv) logger.info("ZOO success rate on MNIST: %.2f", (sum(y_pred != y_pred_adv) / float(len(y_pred)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_tfclassifier(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # Targeted attack zoo = ZooAttack(classifier=tfc, targeted=True, max_iter=100, binary_search_steps=10) params = {'y': random_targets(self.y_test, tfc.nb_classes())} x_test_adv = zoo.generate(self.x_test, **params) self.assertFalse((self.x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('ZOO target: %s', target) logger.debug('ZOO actual: %s', y_pred_adv) logger.info('ZOO success rate on MNIST: %.2f', (sum(target == y_pred_adv) / float(len(target)))) # Untargeted attack zoo = ZooAttack(classifier=tfc, targeted=False) x_test_adv = zoo.generate(self.x_test) # self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) y_pred = np.argmax(tfc.predict(self.x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug('ZOO actual: %s', y_pred_adv) logger.info('ZOO success rate on MNIST: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred)))) # Clean-up session sess.close()
# Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 6: Generate adversarial test examples attack = ZooAttack( classifier=classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=200, binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False, use_importance=False, nb_parallel=5, batch_size=1, variable_h=0.01, ) x_test_adv = attack.generate(x=x_test, y=y_test) # Step 7: Evaluate the ART classifier on adversarial test examples predictions = classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
def adversarial_attack_shift(x, y, delta=1.0, model=RandomForestClassifier(), attack_type='zoo', numerical_features=None, feat_delta=1.0): # in this case delta is the portion of half the data on which to generate attacks # because the first half as a minimum has to be used to train a model against which generate the attacks assert (attack_type in ['zoo', 'boundary', 'hop-skip-jump']) le = preprocessing.LabelEncoder() le.fit(np.squeeze(y)) y = le.transform(y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=(0.5 * delta)) if numerical_features is not None: n_numerical = len(numerical_features) feat_indices = np.random.choice(n_numerical, ceil(n_numerical * feat_delta), replace=False) feat_indices = np.array(numerical_features)[feat_indices] else: feat_indices = np.random.choice(x.shape[1], ceil(x.shape[1] * feat_delta), replace=False) other_features = list(set(range(x.shape[1])) - set(feat_indices)) x_train_other = x_train[:, other_features] x_train_numerical = x_train[:, feat_indices] x_test_other = x_test[:, other_features] x_test_numerical = x_test[:, feat_indices] classifier = SklearnClassifier(model=model, clip_values=(0, np.max(x_train_numerical))) # Train the ART classifier classifier.fit(x_train_numerical, y_train) # Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test_numerical) accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Generate adversarial test examples if attack_type == 'zoo': attack = ZooAttack( classifier=classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=10, binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False, use_importance=False, nb_parallel=x_test_numerical.shape[1], batch_size=1, variable_h=0.01, ) elif attack_type == 'boundary': attack = BoundaryAttack(classifier, targeted=False, epsilon=0.02, max_iter=20, num_trial=10) elif attack_type == 'hop-skip-jump': attack = HopSkipJump(classifier, targeted=False, norm=2, max_iter=20, max_eval=10, init_eval=9, init_size=10) x_adv = attack.generate(x=x_test_numerical, y=y_test) # Evaluate the ART classifier on adversarial test examples predictions_adv = classifier.predict(x_adv) accuracy = np.sum(np.argmax(predictions_adv, axis=1) == y_test) / len(y_test) print("Accuracy on adversarial test examples: {}%".format(accuracy * 100)) print("Max difference: {}".format(np.max(np.abs(x_test_numerical - x_adv) / x_test_numerical))) x_final = np.zeros_like(x) x_final[:, feat_indices] = np.vstack([x_train_numerical, x_adv]) x_final[:, other_features] = np.vstack([x_train_other, x_test_other]) y_final = np.concatenate([y_train, y_test], axis=0) y_final = le.inverse_transform(y_final) adv_indices = list(range(len(y_train), len(y))) return x_final, y_final, adv_indices, feat_indices