def test_3_tensorflow_classifier(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier victim_tfc, sess = get_image_classifier_tf() # Create the thieved classifier thieved_tfc, _ = get_image_classifier_tf(load_init=False, sess=sess) # Create random attack attack = KnockoffNets( classifier=victim_tfc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="random", verbose=False, ) thieved_tfc = attack.extract(x=self.x_train_mnist, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_mnist), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_mnist), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Create adaptive attack attack = KnockoffNets( classifier=victim_tfc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="adaptive", reward="all", verbose=False, ) thieved_tfc = attack.extract(x=self.x_train_mnist, y=self.y_train_mnist, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_mnist), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_mnist), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.4) # Clean-up session if sess is not None: sess.close()
def setUpClass(cls): # MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train, x_test, y_test = ( x_train[:NB_TRAIN], y_train[:NB_TRAIN], x_test[:NB_TEST], y_test[:NB_TEST], ) cls.mnist = ((x_train, y_train), (x_test, y_test)) cls.classifier, _ = get_image_classifier_tf() cls.classifier_2, _ = get_image_classifier_tf()
def test_2_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # set target label target = 0 y_target = np.zeros([len(self.x_train_mnist), 10]) for i in range(len(self.x_train_mnist)): y_target[i, target] = 1.0 # Attack up = TargetedUniversalPerturbation( tfc, max_iter=1, attacker="fgsm", attacker_params={"eps": 0.3, "targeted": True} ) x_train_adv = up.generate(self.x_train_mnist, y=y_target) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_tensorflow_classifier(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Attack attack_st = SpatialTransformation( tfc, max_translation=10.0, num_translations=3, max_rotation=30.0, num_rotations=3 ) x_train_adv = attack_st.generate(self.x_train_mnist) self.assertAlmostEqual(x_train_adv[0, 8, 13, 0], 0.49004024, delta=0.01) self.assertAlmostEqual(attack_st.fooling_rate, 0.71, delta=0.02) self.assertEqual(attack_st.attack_trans_x, 3) self.assertEqual(attack_st.attack_trans_y, 3) self.assertEqual(attack_st.attack_rot, 30.0) x_test_adv = attack_st.generate(self.x_test_mnist) self.assertAlmostEqual(x_test_adv[0, 14, 14, 0], 0.013572651, delta=0.01) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) if sess is not None: sess.close()
def test_3_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Attack nf = NewtonFool(tfc, max_iter=5, batch_size=100) x_test_adv = nf.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) y_pred = tfc.predict(self.x_test_mnist) y_pred_adv = tfc.predict(x_test_adv) y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred y_pred_max = y_pred.max(axis=1) y_pred_adv_max = y_pred_adv[y_pred_bool] self.assertTrue((y_pred_max >= 0.9 * y_pred_adv_max).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Attack up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(self.x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_tensorflow_failure_attack(self): """ Test the corner case when attack fails. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Failure attack zoo = ZooAttack(classifier=tfc, max_iter=0, binary_search_steps=0, learning_rate=0) x_test_mnist_adv = zoo.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) np.testing.assert_almost_equal(self.x_test_mnist, x_test_mnist_adv, 3) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_classifier(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifiers victim_tfc, sess = get_image_classifier_tf() # Define input and output placeholders input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(input_ph, 1, 7, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 4, 4) flattened = tf.layers.flatten(conv) # Logits layer logits = tf.layers.dense(flattened, 10) # Train operator loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train = optimizer.minimize(loss) # TensorFlow session and initialization sess.run(tf.global_variables_initializer()) # Create the classifier thieved_tfc = TensorFlowClassifier( clip_values=(0, 1), input_ph=input_ph, output=logits, labels_ph=output_ph, train=train, loss=loss, learning=None, sess=sess, ) # Create attack copycat_cnn = CopycatCNN( classifier=victim_tfc, batch_size_query=self.batch_size, batch_size_fit=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_tfc = copycat_cnn.extract(x=self.x_train_mnist, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_mnist[:100]), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_mnist[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up session if sess is not None: sess.close() tf.reset_default_graph()
def test_tensorflow_failure_attack_L0(self): """ Test the corner case when attack is failed. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack cl0m = CarliniL0Method( classifier=tfc, targeted=False, max_iter=1, batch_size=10, learning_rate=0.01, binary_search_steps=1, warm_start=True, verbose=False, ) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} _ = cl0m.generate(self.x_test_mnist, **params) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # self.assertTrue(np.allclose(self.x_test_mnist, x_test_adv, atol=1e-3)) # Clean-up session if sess is not None: sess.close()
def test_generate_default(fix_get_mnist_subset, is_tf_version_2): if is_tf_version_2: classifier, _ = get_image_classifier_tf(from_logits=True) attack = AutoAttack( estimator=classifier, norm=np.inf, eps=0.3, eps_step=0.1, attacks=None, batch_size=32, estimator_orig=None, ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.mean(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.0292, abs=0.105) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.3, abs=0.05)
def test_2_tensorflow_failure_attack(self): """ Test the corner case when attack fails. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Failure attack ead = ElasticNet( classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1, verbose=False, ) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = ead.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) np.testing.assert_almost_equal(self.x_test_mnist, x_test_adv, 3) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_failure_attack_L2(self): """ Test the corner case when attack is failed. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = cl2m.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) np.testing.assert_array_almost_equal(self.x_test_mnist, x_test_adv, decimal=3) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_v2_framework(self): """ First test with the TensorFlowClassifier. :return: """ tfc, _ = get_image_classifier_tf(from_logits=True) attack_ap = AdversarialPatch( tfc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=10, patch_shape=(28, 28, 1), ) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(self.x_train_mnist, target, shuffle=False) self.assertAlmostEqual(patch_adv[8, 8, 0], 0.21282613, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.5411238, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 378.3399658203125, delta=1.0)
def test_3_tensorflow_mnist_targeted(self): """ Test with the TensorFlowClassifier. (Targeted Attack) :return: """ classifier, sess = get_image_classifier_tf() self._test_attack(classifier, self.x_test_mnist, self.y_test_mnist, True)
def test_3_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # Targeted attack zoo = ZooAttack(classifier=tfc, targeted=True, max_iter=30, binary_search_steps=8, batch_size=128, verbose=False) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_mnist_adv = zoo.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_mnist_adv).all()) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1) logger.debug("ZOO target: %s", target) logger.debug("ZOO actual: %s", y_pred_adv) logger.info("ZOO success rate on MNIST: %.2f", (sum(target == y_pred_adv) / float(len(target)))) # Untargeted attack zoo = ZooAttack(classifier=tfc, targeted=False, max_iter=10, binary_search_steps=3, verbose=False) x_test_mnist_adv = zoo.generate(self.x_test_mnist) # self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1) logger.debug("ZOO actual: %s", y_pred_adv) logger.info("ZOO success rate on MNIST: %.2f", (sum(y_pred != y_pred_adv) / float(len(y_pred)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Check resize x_test_resized = zoo._resize_image(self.x_test_mnist, 64, 64) self.assertEqual(x_test_resized.shape, (1, 64, 64, 1)) # Clean-up session if sess is not None: sess.close()
def test_1_tensorflow_classifier(self): """ First test with the TensorFlowClassifier. :return: """ # Create the trained classifier trained_classifier, sess = get_image_classifier_tf() # Create the modified classifier transformed_classifier, _ = get_image_classifier_tf(load_init=False, sess=sess) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation transformed_classifier = transformer( x=self.x_train_mnist, transformed_classifier=transformed_classifier) # Compare the 2 outputs preds1 = trained_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds2 = transformed_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds1 = np.argmax(preds1, axis=1) preds2 = np.argmax(preds2, axis=1) acc = np.sum(preds1 == preds2) / len(preds1) self.assertGreater(acc, 0.5) ce = cross_entropy(preds1, preds2) self.assertLess(ce, 10) self.assertGreaterEqual(ce, 0) # Clean-up session if sess is not None: sess.close()
def _image_dl_estimator(one_classifier=False, functional=False, **kwargs): sess = None wildcard = False classifier_list = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier_list = [ get_image_classifier_kr_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr(**kwargs)] if framework == "tensorflow": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) classifier_list = [classifier] if framework == "pytorch": if wildcard is False and functional is False: classifier_list = [get_image_classifier_pt(**kwargs)] if framework == "scikitlearn": logging.warning( "{0} doesn't have an image classifier defined yet".format( framework)) classifier_list = None if framework == "kerastf": if wildcard: classifier_list = [ get_image_classifier_kr_tf_with_wildcard(**kwargs) ] else: if functional: classifier_list = [ get_image_classifier_kr_tf_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr_tf(**kwargs)] if framework == "mxnet": if wildcard is False and functional is False: classifier_list = [get_image_classifier_mx_instance(**kwargs)] if classifier_list is None: return None, None if one_classifier: return classifier_list[0], sess return classifier_list, sess
def test_tensorflow_mnist_L2(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # First attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=10, verbose=False) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = cl2m.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW2 Target: %s", target) logger.debug("CW2 Actual: %s", y_pred_adv) logger.info("CW2 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack, no batching cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=10, batch_size=1, verbose=False) x_test_adv = cl2m.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW2 Target: %s", target) logger.debug("CW2 Actual: %s", y_pred_adv) logger.info("CW2 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def _image_dl_estimator(functional=False, **kwargs): sess = None wildcard = False classifier = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier = get_image_classifier_kr_functional(**kwargs) else: try: classifier = get_image_classifier_kr(**kwargs) except NotImplementedError: raise ARTTestFixtureNotImplemented( "This combination of loss function options is currently not supported.", image_dl_estimator.__name__, framework, ) if framework == "tensorflow1" or framework == "tensorflow2": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) return classifier, sess if framework == "pytorch": if not wildcard: if functional: classifier = get_image_classifier_pt_functional(**kwargs) else: classifier = get_image_classifier_pt(**kwargs) if framework == "kerastf": if wildcard: classifier = get_image_classifier_kr_tf_with_wildcard(**kwargs) else: if functional: classifier = get_image_classifier_kr_tf_functional( **kwargs) else: classifier = get_image_classifier_kr_tf(**kwargs) if framework == "mxnet": if wildcard is False and functional is False: classifier = get_image_classifier_mx_instance(**kwargs) if classifier is None: raise ARTTestFixtureNotImplemented( "no test deep learning estimator available", image_dl_estimator.__name__, framework) return classifier, sess
def test_tensorflow_mnist(self): classifier, sess = get_image_classifier_tf(from_logits=False) scores = get_labels_np_array(classifier.predict(self.x_train_mnist)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) / self.y_train_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on training set: %.2f%%", (acc * 100)) scores = get_labels_np_array(classifier.predict(self.x_test_mnist)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.y_test_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on test set: %.2f%%", (acc * 100)) self._test_backend_mnist(classifier, self.x_test_mnist, self.y_test_mnist)
def test_3_tensorflow_mnist(self): x_test_original = self.x_test_mnist.copy() # Create basic CNN on MNIST using TensorFlow classifier, sess = get_image_classifier_tf(from_logits=True) scores = get_labels_np_array(classifier.predict(self.x_train_mnist)) sum2 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum2 / self.y_train_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(self.x_test_mnist)) sum3 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum3 / self.y_test_mnist.shape[0] logger.info("[TF, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11, verbose=False) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum4 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum4 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum5 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum5 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_2_tensorflow_mnist(self): (x_train, y_train), (x_test, y_test) = self.mnist classifier, sess = get_image_classifier_tf() scores = get_labels_np_array(classifier.predict(x_train)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / len(y_train) logger.info("[TF, MNIST] Accuracy on training set: %.2f%%", acc * 100) scores = get_labels_np_array(classifier.predict(x_test)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("[TF, MNIST] Accuracy on test set: %.2f%%", acc * 100) self._test_backend_mnist(classifier, x_train, y_train, x_test, y_test)
def test_tensorflow_mnist_LInf(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=10, initial_const=1, largest_const=1.1, verbose=False) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack, no batching clinfm = CarliniLInfMethod(classifier=tfc, targeted=False, max_iter=10, initial_const=1, largest_const=1.1, verbose=False) x_test_adv = clinfm.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) logger.debug("CW0 Target: %s", target) logger.debug("CW0 Actual: %s", y_pred_adv) logger.info("CW0 Success Rate: %.2f", (np.sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Clean-up session if sess is not None: sess.close()
def _image_dl_estimator_defended(one_classifier=False, **kwargs): sess = None classifier = None clip_values = (0, 1) fs = FeatureSqueezing(bit_depth=2, clip_values=clip_values) defenses = [] if kwargs.get("defenses") is None: defenses.append(fs) else: if "FeatureSqueezing" in kwargs.get("defenses"): defenses.append(fs) if "JpegCompression" in kwargs.get("defenses"): defenses.append( JpegCompression(clip_values=clip_values, apply_predict=True)) if "SpatialSmoothing" in kwargs.get("defenses"): defenses.append(SpatialSmoothing()) del kwargs["defenses"] if framework == "tensorflow2": classifier, _ = get_image_classifier_tf(**kwargs) if framework == "keras": classifier = get_image_classifier_kr(**kwargs) if framework == "kerastf": classifier = get_image_classifier_kr_tf(**kwargs) if framework == "pytorch": classifier = get_image_classifier_pt(**kwargs) for i, defense in enumerate(defenses): if "channels_first" in defense.params: defenses[i].channels_first = classifier.channels_first if classifier is not None: classifier.set_params(preprocessing_defences=defenses) else: raise ARTTestFixtureNotImplemented( "no defended image estimator", image_dl_estimator_defended.__name__, framework, {"defenses": defenses}) return classifier, sess
def test_tensorflow(self): """ First test with the TensorFlowClassifier. :return: """ tfc, sess = get_image_classifier_tf() attack_ap = AdversarialPatch( tfc, rotation_max=22.5, scale_min=0.1, scale_max=1.0, learning_rate=5.0, batch_size=10, max_iter=500 ) patch_adv, _ = attack_ap.generate(self.x_train_mnist) self.assertAlmostEqual(patch_adv[8, 8, 0], -3.1106631027725005, delta=0.4) self.assertAlmostEqual(patch_adv[14, 14, 0], 18.101, delta=0.2) self.assertAlmostEqual(float(np.sum(patch_adv)), 624.867, delta=70.0) if sess is not None: sess.close()
def test_tensorflow_failure_attack_LInf(self): """ Test the corner case when attack is failed. :return: """ # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf(from_logits=True) # Failure attack clinfm = CarliniLInfMethod(classifier=tfc, targeted=True, max_iter=0, learning_rate=0, eps=0.5) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = clinfm.generate(self.x_test_mnist, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) self.assertTrue(np.allclose(self.x_test_mnist, x_test_adv, atol=1e-3)) # Clean-up session if sess is not None: sess.close()
def _get_image_classifier_list(one_classifier=False, **kwargs): sess = None if framework == "keras": classifier_list = [get_image_classifier_kr(**kwargs)] if framework == "tensorflow": classifier, sess = get_image_classifier_tf(**kwargs) classifier_list = [classifier] if framework == "pytorch": classifier_list = [get_image_classifier_pt()] if framework == "scikitlearn": logging.warning("{0} doesn't have an image classifier defined yet".format(framework)) classifier_list = None if classifier_list is None: return None, None if one_classifier: return classifier_list[0], sess return classifier_list, sess
def test_tensorflow(self): """ First test with the TensorFlowClassifier. :return: """ import tensorflow as tf tfc, sess = get_image_classifier_tf() attack_ap = AdversarialPatch( tfc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=5, patch_shape=(28, 28, 1), ) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(self.x_train_mnist, target, shuffle=False) if tf.__version__[0] == "2": self.assertAlmostEqual(patch_adv[8, 8, 0], 0.55935985, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.5917497, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 400.0701904296875, delta=1.0) else: self.assertAlmostEqual(patch_adv[8, 8, 0], 0.5332792, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.54590017, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 398.8515625, delta=1.0) if sess is not None: sess.close()
def test_tensorflow_numpy(self): """ First test with the TensorFlowClassifier. :return: """ import tensorflow as tf tfc, sess = get_image_classifier_tf(from_logits=True) attack_ap = AdversarialPatchNumpy( tfc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=5, ) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(self.x_train_mnist, target, shuffle=False) if tf.__version__[0] == "2": self.assertAlmostEqual(patch_adv[8, 8, 0], 0.67151666, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.6292826, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 424.31439208984375, delta=1.0) else: self.assertAlmostEqual(patch_adv[8, 8, 0], 0.67151666, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.6292826, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 424.31439208984375, delta=1.0) if sess is not None: sess.close()
def test_3_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=20, max_eval=100, init_eval=10) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=20, max_eval=100, init_eval=10) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()