def test_defences_predict(self): clip_values = (0, 1) fs = FeatureSqueezing(clip_values=clip_values, bit_depth=2) jpeg = JpegCompression(clip_values=clip_values, apply_predict=True) smooth = SpatialSmoothing() classifier_ = get_image_classifier_kr_tf() classifier = KerasClassifier(clip_values=clip_values, model=classifier_._model, preprocessing_defences=[fs, jpeg, smooth]) self.assertEqual(len(classifier.preprocessing_defences), 3) predictions_classifier = classifier.predict(self.x_test_mnist) # Apply the same defences by hand x_test_defense = self.x_test_mnist x_test_defense, _ = fs(x_test_defense, self.y_test_mnist) x_test_defense, _ = jpeg(x_test_defense, self.y_test_mnist) x_test_defense, _ = smooth(x_test_defense, self.y_test_mnist) classifier = get_image_classifier_kr_tf() predictions_check = classifier._model.predict(x_test_defense) # Check that the prediction results match np.testing.assert_array_almost_equal(predictions_classifier, predictions_check, decimal=4)
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Create the classifier classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all())
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5, "verbose": False } } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_black_box_keras_loss(art_warning, get_iris_dataset): try: (x_train, y_train), (_, _) = get_iris_dataset # This test creates a framework-specific (keras) model because it needs to check both the case of a string-based # loss and a class-based loss, and therefore cannot use the generic fixture get_tabular_classifier_list model = keras.models.Sequential() model.add(keras.layers.Dense(8, input_dim=4, activation="relu")) model.add(keras.layers.Dense(3, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model.fit(x_train, y_train, epochs=150, batch_size=10) classifier = KerasClassifier(model) attack = MembershipInferenceBlackBox(classifier, input_type="loss") backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.15) model2 = keras.models.Sequential() model2.add(keras.layers.Dense(12, input_dim=4, activation="relu")) model2.add(keras.layers.Dense(3, activation="softmax")) model2.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"]) model2.fit(x_train, y_train, epochs=150, batch_size=10) classifier = KerasClassifier(model2) attack = MembershipInferenceBlackBox(classifier, input_type="loss") backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.15) except ARTTestException as e: art_warning(e)
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() def t(x): return x def transformation(): while True: yield t # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_defences_predict(get_default_mnist_subset, get_image_classifier_list): (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = get_default_mnist_subset clip_values = (0, 1) fs = FeatureSqueezing(clip_values=clip_values, bit_depth=2) jpeg = JpegCompression(clip_values=clip_values, apply_predict=True) smooth = SpatialSmoothing() classifier_, _ = get_image_classifier_list(one_classifier=True) classifier = KerasClassifier(clip_values=clip_values, model=classifier_.model, preprocessing_defences=[fs, jpeg, smooth]) assert len(classifier.preprocessing_defences) == 3 predictions_classifier = classifier.predict(x_test_mnist) # Apply the same defences by hand x_test_defense = x_test_mnist x_test_defense, _ = fs(x_test_defense, y_test_mnist) x_test_defense, _ = jpeg(x_test_defense, y_test_mnist) x_test_defense, _ = smooth(x_test_defense, y_test_mnist) classifier, _ = get_image_classifier_list(one_classifier=True) predictions_check = classifier.model.predict(x_test_defense) # Check that the prediction results match np.testing.assert_array_almost_equal(predictions_classifier, predictions_check, decimal=4)
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_keras_iris_unbounded_LInf(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_keras_classifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier victim_krc = get_image_classifier_kr() # Create simple CNN model = Sequential() model.add( Conv2D(1, kernel_size=(7, 7), activation="relu", input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(4, 4))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) loss = keras.losses.categorical_crossentropy model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_mnist, thieved_classifier=thieved_krc) victim_preds = np.argmax( victim_krc.predict(x=self.x_train_mnist[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_mnist[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def test_functional_model(get_functional_model): functional_model = get_functional_model keras_model = KerasClassifier(functional_model, clip_values=(0, 1), input_layer=1, output_layer=1) assert keras_model._input.name == "input1:0" assert keras_model._output.name == "output1/Softmax:0" keras_model = KerasClassifier(functional_model, clip_values=(0, 1), input_layer=0, output_layer=0) assert keras_model._input.name == "input0:0" assert keras_model._output.name == "output0/Softmax:0"
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = VirtualAdversarialMethod(classifier, eps=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv > 1).any()) self.assertTrue((x_test_iris_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with VAT adversarial examples: %.2f%%", (acc * 100))
def test_functional_model(self): # Need to update the functional_model code to produce a model with more than one input and output layers... keras_model = KerasClassifier(self.functional_model, clip_values=(0, 1), input_layer=1, output_layer=1) self.assertTrue(keras_model._input.name, "input1") self.assertTrue(keras_model._output.name, "output1") keras_model = KerasClassifier(self.functional_model, clip_values=(0, 1), input_layer=0, output_layer=0) self.assertTrue(keras_model._input.name, "input0") self.assertTrue(keras_model._output.name, "output0")
def test_resnet(create_test_image): image_file_path = create_test_image keras.backend.set_learning_phase(0) model = ResNet50(weights="imagenet") classifier = KerasClassifier(model, clip_values=(0, 255)) image = img_to_array(load_img(image_file_path, target_size=(224, 224))) image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) prediction = classifier.predict(image) label = decode_predictions(prediction)[0][0] assert label[1] == "Weimaraner" np.testing.assert_array_almost_equal(prediction[0, 178], 0.2658045, decimal=3)
def test_pickle(self): filename = "my_classifier.p" full_path = os.path.join(ART_DATA_PATH, filename) folder = os.path.split(full_path)[0] if not os.path.exists(folder): os.makedirs(folder) fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) keras_model = KerasClassifier(self.functional_model, clip_values=(0, 1), input_layer=1, output_layer=1, preprocessing_defences=fs) with open(full_path, "wb") as save_file: pickle.dump(keras_model, save_file) # Unpickle: with open(full_path, "rb") as load_file: loaded = pickle.load(load_file) np.testing.assert_equal(keras_model._clip_values, loaded._clip_values) self.assertEqual(keras_model._channels_first, loaded._channels_first) self.assertEqual(keras_model._use_logits, loaded._use_logits) self.assertEqual(keras_model._input_layer, loaded._input_layer) self.assertEqual(self.functional_model.get_config(), loaded._model.get_config()) self.assertTrue( isinstance(loaded.preprocessing_defences[0], FeatureSqueezing)) os.remove(full_path)
def test_7_keras_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def _cnn_mnist_k(input_shape): import tensorflow as tf tf_version = [int(v) for v in tf.__version__.split(".")] if tf_version[0] == 2 and tf_version[1] >= 3: tf.compat.v1.disable_eager_execution() from tensorflow import keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D else: import keras from keras.models import Sequential from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]) classifier = KerasClassifier(model=model, clip_values=(0, 1), use_logits=False) return classifier
def _create_krclassifier(): """ To create a simple KerasClassifier for testing. :return: """ # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation="relu", input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]) # Get the classifier krc = KerasClassifier(model=model, clip_values=(0, 1), use_logits=False) return krc
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def test_keras_iris(self): """ Second test for Keras. :return: """ # Build KerasClassifier victim_krc = get_tabular_classifier_kr() # Create simple CNN model = Sequential() model.add(Dense(10, input_shape=(4, ), activation="relu")) model.add(Dense(10, activation="relu")) model.add(Dense(3, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False, channels_first=True) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_krc) victim_preds = np.argmax(victim_krc.predict(x=self.x_train_iris[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_iris[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def setUpClass(cls): (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test), (min_, max_) # Create simple keras model import tensorflow as tf tf_version = [int(v) for v in tf.__version__.split(".")] if tf_version[0] == 2 and tf_version[1] >= 3: tf.compat.v1.disable_eager_execution() from tensorflow.keras import backend as k from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D else: import keras.backend as k from keras.models import Sequential from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D k.set_learning_phase(1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=x_train.shape[1:])) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) from art.estimators.classification.keras import KerasClassifier cls.classifier = KerasClassifier(model=model, clip_values=(min_, max_)) cls.classifier.fit(x_train, y_train, nb_epochs=1, batch_size=128) cls.defence = ActivationDefence(cls.classifier, x_train, y_train) datagen = ImageDataGenerator() datagen.fit(x_train) data_gen = KerasDataGenerator(datagen.flow(x_train, y_train, batch_size=NB_TRAIN), size=NB_TRAIN, batch_size=NB_TRAIN) cls.defence_gen = ActivationDefence(cls.classifier, None, None, generator=data_gen)
def setUpClass(cls): master_seed(seed=1234, set_tensorflow=True) super().setUpClass() cls.n_train = 100 cls.n_test = 11 cls.x_train_mnist = cls.x_train_mnist[0:cls.n_train] cls.y_train_mnist = cls.y_train_mnist[0:cls.n_train] cls.x_test_mnist = cls.x_test_mnist[0:cls.n_test] cls.y_test_mnist = cls.y_test_mnist[0:cls.n_test] model = load_model( join(*[ dirname(dirname(dirname(__file__))), "utils", "data", "test_models", "model_test_functionally_equivalent_extraction.h5", ])) np.random.seed(0) num_neurons = 16 img_rows = 28 img_cols = 28 num_channels = 1 x_train = cls.x_train_mnist.reshape(cls.n_train, img_rows, img_cols, num_channels) x_test = cls.x_test_mnist.reshape(cls.n_test, img_rows, img_cols, num_channels) x_train = x_train.reshape( (x_train.shape[0], num_channels * img_rows * img_cols)).astype("float64") x_test = x_test.reshape( (x_test.shape[0], num_channels * img_rows * img_cols)).astype("float64") mean = np.mean(x_train) std = np.std(x_train) x_train = (x_train - mean) / std classifier = KerasClassifier(model=model, use_logits=True, clip_values=(0, 1)) cls.fee = FunctionallyEquivalentExtraction(classifier=classifier, num_neurons=num_neurons) fee_extracted = cls.fee.extract(x_train[0:10], fraction_true=0.1, delta_0=6, delta_value_max=1, d2_min=0.00000000000000000001, ftol=0.01) fee_extracted.predict(x=x_test)
def test_8_keras_iris_vector_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
def _predict_classifier(self, x: np.ndarray, batch_size: int = 128, training_mode: bool = False, **kwargs) -> np.ndarray: x = x.astype(ART_NUMPY_DTYPE) return KerasClassifier.predict(self, x=x, batch_size=batch_size, training_mode=training_mode, **kwargs)
def test_binary_keras_instantiation_and_attack_pgd(art_warning): tf.compat.v1.disable_eager_execution() try: x, y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_informative=5, n_redundant=2, n_repeated=0, n_classes=2) train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split( x, y, test_size=0.2) train_x = train_x.astype(np.float32) test_x = test_x.astype(np.float32) model = tf.keras.models.Sequential([ tf.keras.layers.Dense(128, activation=tf.nn.relu, input_shape=(20, )), tf.keras.layers.Dense(1, activation=tf.nn.sigmoid), ]) model.summary() model.compile(optimizer=tf.optimizers.Adam(), loss="binary_crossentropy", metrics=["accuracy"]) classifier = KerasClassifier(model=model) classifier.fit(train_x, train_y, nb_epochs=5) pred = classifier.predict(test_x) attack = ProjectedGradientDescent(estimator=classifier, eps=0.5) x_test_adv = attack.generate(x=test_x) adv_predictions = classifier.predict(x_test_adv) assert (adv_predictions != pred).any() except ARTTestException as e: art_warning(e)
def test_binary_activation_detector(self): """ Test the binary activation detector end-to-end. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier = get_image_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:NB_TRAIN]) x_test_adv = attacker.generate(x_test[:NB_TRAIN]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0) y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0) # Create a simple CNN for the detector activation_shape = classifier.get_activations(x_test[:1], 0, batch_size=128).shape[1:] number_outputs = 2 model = Sequential() model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape)) model.add(Flatten()) model.add(Dense(number_outputs, activation="softmax")) model.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"] ) # Create detector and train it. # Detector consider activations at layer=0: detector = BinaryActivationDetector( classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0 ) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug("Number of true positives detected: %i", nb_true_positives) logger.debug("Number of true negatives detected: %i", nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def _cnn_mnist_k(input_shape): # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]) classifier = KerasClassifier(model=model, clip_values=(0, 1), use_logits=False) return classifier
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Wrap the classifier classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( "Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%", (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( "Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%", (acc * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test_iris) expected_x_test_adv = np.asarray( [0.85931635, 0.44633555, 0.65658355, 0.23840423]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 0, 1, 1, 1, 2, 0, 2, 2, 1, 1, 2, ]), ) accuracy = 1.0 - np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100))