def setUp(self): (self.x_train, self.y_train), (x_test, y_test), min_, max_ = load_dataset(str('mnist')) self.x_train = self.x_train[:300] self.y_train = self.y_train[:300] k.set_learning_phase(1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=self.x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.classifier = KerasClassifier((min_, max_), model=model) self.classifier.fit(self.x_train, self.y_train, nb_epochs=1, batch_size=128) self.defence = ActivationDefence(self.classifier, self.x_train, self.y_train)
def test_fix_relabel_poison(self): (x_train, y_train), (_, _), (_, _) = self.mnist x_poison = x_train[:100] y_fix = y_train[:100] test_set_split = 0.7 n_train = int(len(x_poison) * test_set_split) x_test = x_poison[n_train:] y_test = y_fix[n_train:] predictions = np.argmax(self.classifier.predict(x_test), axis=1) ini_miss = 1 - np.sum(predictions == np.argmax(y_test, axis=1)) / y_test.shape[0] improvement, new_classifier = ActivationDefence.relabel_poison_ground_truth(self.classifier, x_poison, y_fix, test_set_split=test_set_split, tolerable_backdoor=0.01, max_epochs=5, batch_epochs=10) predictions = np.argmax(new_classifier.predict(x_test), axis=1) final_miss = 1 - np.sum(predictions == np.argmax(y_test, axis=1)) / y_test.shape[0] self.assertEqual(improvement, ini_miss - final_miss) # Other method (since it's cross validation we can't assert to a concrete number). improvement, _ = ActivationDefence.relabel_poison_cross_validation(self.classifier, x_poison, y_fix, n_splits=2, tolerable_backdoor=0.01, max_epochs=5, batch_epochs=10) self.assertGreaterEqual(improvement, 0)
def test_pickle(self): # Test pickle and unpickle: filename = 'test_pickle.h5' ActivationDefence._pickle_classifier(self.classifier, filename) loaded = ActivationDefence._unpickle_classifier(filename) self.assertTrue(self.classifier._clip_values == loaded._clip_values) self.assertTrue(self.classifier._channel_index == loaded._channel_index) self.assertTrue(self.classifier._use_logits == loaded._use_logits) self.assertTrue(self.classifier._input_layer == loaded._input_layer) ActivationDefence._remove_pickle(filename)
def setUpClass(cls): (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test), (min_, max_) # Create simple keras model import keras.backend as k from keras.models import Sequential from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D k.set_learning_phase(1) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:])) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) from art.classifiers import KerasClassifier cls.classifier = KerasClassifier((min_, max_), model=model) cls.classifier.fit(x_train, y_train, nb_epochs=1, batch_size=128) cls.defence = ActivationDefence(cls.classifier, x_train, y_train)
def setUpClass(cls): (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test) k.set_learning_phase(1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) cls.classifier = KerasClassifier((0, 1), model=model) cls.classifier.fit(x_train, y_train, nb_epochs=2, batch_size=128) cls.defence = ActivationDefence(cls.classifier, x_train, y_train)
def setUpClass(cls): # Build KerasClassifier cls.classifier, sess = get_classifier_kr() (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test) cls.defence = ActivationDefence(cls.classifier, x_train, y_train)
def main(): try: print('See if poison model has been previously trained ') import pickle classifier = pickle.load(open('my_poison_classifier.p', 'rb')) print('Loaded model from pickle.... ') data_train = np.load('data_training.npz') x_train = data_train['x_train'] y_train = data_train['y_train'] is_poison_train = data_train['is_poison_train'] data_test = np.load('data_testing.npz') x_test = data_test['x_test'] y_test = data_test['y_test'] is_poison_test = data_test['is_poison_test'] except: # Read MNIST dataset (x_raw contains the original images): (x_raw, y_raw), (x_raw_test, y_raw_test), min_, max_ = load_mnist(raw=True) n_train = np.shape(x_raw)[0] num_selection = n_train random_selection_indices = np.random.choice(n_train, num_selection) x_raw = x_raw[random_selection_indices] y_raw = y_raw[random_selection_indices] # Poison training data perc_poison = .33 (is_poison_train, x_poisoned_raw, y_poisoned_raw) = generate_backdoor(x_raw, y_raw, perc_poison) x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw) # Add channel axis: x_train = np.expand_dims(x_train, axis=3) # Poison test data (is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = generate_backdoor(x_raw_test, y_raw_test, perc_poison) x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test) # Add channel axis: x_test = np.expand_dims(x_test, axis=3) # Shuffle training data so poison is not together n_train = np.shape(y_train)[0] shuffled_indices = np.arange(n_train) np.random.shuffle(shuffled_indices) x_train = x_train[shuffled_indices] y_train = y_train[shuffled_indices] is_poison_train = is_poison_train[shuffled_indices] # Save data used for training and testing split: np.savez('data_training.npz', x_train=x_train, y_train=y_train, is_poison_train=is_poison_train, x_raw=x_poisoned_raw) np.savez('data_testing.npz', x_test=x_test, y_test=y_test, is_poison_test=is_poison_test, x_raw_test=x_poisoned_raw_test) # Create Keras convolutional neural network - basic architecture from Keras examples # Source here: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py k.set_learning_phase(1) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier((min_, max_), model=model) classifier.fit(x_train, y_train, nb_epochs=50, batch_size=128) print('Saving poisoned model: ') pickle.dump(classifier, open('my_poison_classifier.p', 'wb')) # Also saving for Anu: file_name = 'anu_poison_mnist' model.save(file_name + '.hdf5') model_json = model.to_json() with open(file_name + '.json', "w") as json_file: json_file.write(model_json) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Evaluate the classifier on poisonous data preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1) acc = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0] print("\nPoisonous test set accuracy (i.e. effectiveness of poison): %.2f%%" % (acc * 100)) # Evaluate the classifier on clean data preds = np.argmax(classifier.predict(x_test[is_poison_test == 0]), axis=1) acc = np.sum(preds == np.argmax(y_test[is_poison_test == 0], axis=1)) / y_test[is_poison_test == 0].shape[0] print("\nClean test set accuracy: %.2f%%" % (acc * 100)) # Calling poisoning defence: defence = ActivationDefence(classifier, x_train, y_train) # End-to-end method: print("------------------- Results using size metric -------------------") print(defence.get_params()) defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA") # Now fix the model x_new, y_fix = correct_poisoned_labels(x_train, y_train, is_poison_train) improvement = defence.relabel_poison_ground_truth(x_new, y_fix, test_set_split=0.7, tolerable_backdoor=0.001, max_epochs=5, batch_epochs=10) # Evaluate the classifier on poisonous data after backdoor fix: preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1) acc_after = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0] print("\nPoisonous test set accuracy (i.e. effectiveness of poison) after backdoor fix: %.2f%%" % (acc_after * 100)) print("\n Improvement after training: ", improvement) print('before: ', acc, ' after: ', acc_after) print("done :) ")
class TestActivationDefence(unittest.TestCase): # python -m unittest discover art/ -p 'activation_defence_unittest.py' def setUp(self): (self.x_train, self.y_train), (x_test, y_test), min_, max_ = load_dataset(str('mnist')) self.x_train = self.x_train[:300] self.y_train = self.y_train[:300] k.set_learning_phase(1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=self.x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.classifier = KerasClassifier((min_, max_), model=model) self.classifier.fit(self.x_train, self.y_train, nb_epochs=1, batch_size=128) self.defence = ActivationDefence(self.classifier, self.x_train, self.y_train) # def tearDown(self): # self.classifier.dispose() # self.x_train.dispose() # self.y_train.dispose() @unittest.expectedFailure def test_wrong_parameters_1(self): self.defence.set_params(n_clusters=0) @unittest.expectedFailure def test_wrong_parameters_2(self): self.defence.set_params(clustering_method='what') @unittest.expectedFailure def test_wrong_parameters_3(self): self.defence.set_params(reduce='what') @unittest.expectedFailure def test_wrong_parameters_4(self): self.defence.set_params(cluster_analysis='what') def test_activations(self): activations = self.defence._get_activations() self.assertEqual(len(self.x_train), len(activations)) def test_output_clusters(self): n_classes = self.classifier.nb_classes for n_clusters in range(2, 5): clusters_by_class, red_activations_by_class = self.defence.cluster_activations( n_clusters=n_clusters) # Verify expected number of classes self.assertEqual(np.shape(clusters_by_class)[0], n_classes) # Check we get the expected number of clusters: found_clusters = len(np.unique(clusters_by_class[0])) self.assertEqual(found_clusters, n_clusters) # Check right amount of data n_dp = 0 for i in range(0, n_classes): n_dp += len(clusters_by_class[i]) self.assertEqual(len(self.x_train), n_dp) def test_detect_poison(self): confidence_level, is_clean_lst = self.defence.detect_poison( n_clusters=2, ndims=10, reduce='PCA') sum_clean1 = sum(is_clean_lst) # Check number of items in is_clean self.assertEqual(len(self.x_train), len(is_clean_lst)) self.assertEqual(len(self.x_train), len(confidence_level)) # Test right number of clusters found_clusters = len(np.unique(self.defence.clusters_by_class[0])) self.assertEqual(found_clusters, 2) confidence_level, is_clean_lst = self.defence.detect_poison( n_clusters=3, ndims=10, reduce='PCA', cluster_analysis='distance') self.assertEqual(len(self.x_train), len(is_clean_lst)) self.assertEqual(len(self.x_train), len(confidence_level)) # Test change of state to new number of clusters: found_clusters = len(np.unique(self.defence.clusters_by_class[0])) self.assertEqual(found_clusters, 3) # Test clean data has changed sum_clean2 = sum(is_clean_lst) self.assertNotEqual(sum_clean1, sum_clean2) confidence_level, is_clean_lst = self.defence.detect_poison( n_clusters=2, ndims=10, reduce='PCA', cluster_analysis='distance') sum_dist = sum(is_clean_lst) confidence_level, is_clean_lst = self.defence.detect_poison( n_clusters=2, ndims=10, reduce='PCA', cluster_analysis='smaller') sum_size = sum(is_clean_lst) self.assertNotEqual(sum_dist, sum_size) def test_analyze_cluster(self): dist_clean_by_class = self.defence.analyze_clusters( cluster_analysis='distance') n_classes = self.classifier.nb_classes self.assertEqual(n_classes, len(dist_clean_by_class)) # Check right amount of data n_dp = 0 for i in range(0, n_classes): n_dp += len(dist_clean_by_class[i]) self.assertEqual(len(self.x_train), n_dp) sz_clean_by_class = self.defence.analyze_clusters( cluster_analysis='smaller') n_classes = self.classifier.nb_classes self.assertEqual(n_classes, len(sz_clean_by_class)) # Check right amount of data n_dp = 0 sum_sz = 0 sum_dis = 0 for i in range(0, n_classes): n_dp += len(sz_clean_by_class[i]) sum_sz += sum(sz_clean_by_class[i]) sum_dis += sum(dist_clean_by_class[i]) self.assertEqual(len(self.x_train), n_dp) # Very unlikely that they are the same self.assertNotEqual( sum_dis, sum_sz, msg='This is very unlikely to happen... there may be an error') if __name__ == '__main__': unittest.main()
def main(): # Read MNIST dataset (x_raw contains the original images): (x_raw, y_raw), (x_raw_test, y_raw_test), min_, max_ = load_mnist(raw=True) n_train = np.shape(x_raw)[0] num_selection = 5000 random_selection_indices = np.random.choice(n_train, num_selection) x_raw = x_raw[random_selection_indices] y_raw = y_raw[random_selection_indices] # Poison training data perc_poison = 0.33 (is_poison_train, x_poisoned_raw, y_poisoned_raw) = generate_backdoor(x_raw, y_raw, perc_poison) x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw) # Add channel axis: x_train = np.expand_dims(x_train, axis=3) # Poison test data (is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = generate_backdoor(x_raw_test, y_raw_test, perc_poison) x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test) # Add channel axis: x_test = np.expand_dims(x_test, axis=3) # Shuffle training data so poison is not together n_train = np.shape(y_train)[0] shuffled_indices = np.arange(n_train) np.random.shuffle(shuffled_indices) x_train = x_train[shuffled_indices] y_train = y_train[shuffled_indices] is_poison_train = is_poison_train[shuffled_indices] # Create Keras convolutional neural network - basic architecture from Keras examples # Source here: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(10, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=30, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Evaluate the classifier on poisonous data preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1) acc = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0] print( "\nPoisonous test set accuracy (i.e. effectiveness of poison): %.2f%%" % (acc * 100)) # Evaluate the classifier on clean data preds = np.argmax(classifier.predict(x_test[is_poison_test == 0]), axis=1) acc = np.sum(preds == np.argmax(y_test[ is_poison_test == 0], axis=1)) / y_test[is_poison_test == 0].shape[0] print("\nClean test set accuracy: %.2f%%" % (acc * 100)) # Calling poisoning defence: defence = ActivationDefence(classifier, x_train, y_train) # End-to-end method: print("------------------- Results using size metric -------------------") print(defence.get_params()) defence.detect_poison(nb_clusters=2, nb_dims=10, reduce="PCA") # Evaluate method when ground truth is known: is_clean = is_poison_train == 0 confusion_matrix = defence.evaluate_defence(is_clean) print("Evaluation defence results for size-based metric: ") jsonObject = json.loads(confusion_matrix) for label in jsonObject: print(label) pprint.pprint(jsonObject[label]) # Visualize clusters: print("Visualize clusters") sprites_by_class = defence.visualize_clusters(x_train, "mnist_poison_demo") # Show plots for clusters of class 5 n_class = 5 try: import matplotlib.pyplot as plt plt.imshow(sprites_by_class[n_class][0]) plt.title("Class " + str(n_class) + " cluster: 0") plt.show() plt.imshow(sprites_by_class[n_class][1]) plt.title("Class " + str(n_class) + " cluster: 1") plt.show() except ImportError: print( "matplotlib not installed. For this reason, cluster visualization was not displayed" ) # Try again using distance analysis this time: print( "------------------- Results using distance metric -------------------" ) print(defence.get_params()) defence.detect_poison(nb_clusters=2, nb_dims=10, reduce="PCA", cluster_analysis="distance") confusion_matrix = defence.evaluate_defence(is_clean) print("Evaluation defence results for distance-based metric: ") jsonObject = json.loads(confusion_matrix) for label in jsonObject: print(label) pprint.pprint(jsonObject[label]) # Other ways to invoke the defence: kwargs = {"nb_clusters": 2, "nb_dims": 10, "reduce": "PCA"} defence.cluster_activations(**kwargs) kwargs = {"cluster_analysis": "distance"} defence.analyze_clusters(**kwargs) defence.evaluate_defence(is_clean) kwargs = {"cluster_analysis": "smaller"} defence.analyze_clusters(**kwargs) defence.evaluate_defence(is_clean) print("done :) ")
def main(): # Read MNIST dataset (x_raw contains the original images): (x_raw, y_raw), (x_raw_test, y_raw_test), min_, max_ = load_mnist(raw=True) n_train = np.shape(x_raw)[0] num_selection = 5000 random_selection_indices = np.random.choice(n_train, num_selection) x_raw = x_raw[random_selection_indices] y_raw = y_raw[random_selection_indices] # Poison training data perc_poison = .33 (is_poison_train, x_poisoned_raw, y_poisoned_raw) = generate_backdoor(x_raw, y_raw, perc_poison) x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw) # Add channel axis: x_train = np.expand_dims(x_train, axis=3) # Poison test data (is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = generate_backdoor(x_raw_test, y_raw_test, perc_poison) x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test) # Add channel axis: x_test = np.expand_dims(x_test, axis=3) # Shuffle training data so poison is not together n_train = np.shape(y_train)[0] shuffled_indices = np.arange(n_train) np.random.shuffle(shuffled_indices) x_train = x_train[shuffled_indices] y_train = y_train[shuffled_indices] is_poison_train = is_poison_train[shuffled_indices] # Create Keras convolutional neural network - basic architecture from Keras examples # Source here: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py k.set_learning_phase(1) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier((min_, max_), model=model) classifier.fit(x_train, y_train, nb_epochs=30, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Evaluate the classifier on poisonous data preds = np.argmax(classifier.predict(x_test[is_poison_test]), axis=1) acc = np.sum(preds == np.argmax(y_test[is_poison_test], axis=1)) / y_test[is_poison_test].shape[0] print("\nPoisonous test set accuracy (i.e. effectiveness of poison): %.2f%%" % (acc * 100)) # Evaluate the classifier on clean data preds = np.argmax(classifier.predict(x_test[is_poison_test == 0]), axis=1) acc = np.sum(preds == np.argmax(y_test[is_poison_test == 0], axis=1)) / y_test[is_poison_test == 0].shape[0] print("\nClean test set accuracy: %.2f%%" % (acc * 100)) # Calling poisoning defence: defence = ActivationDefence(classifier, x_train, y_train, verbose=True) # End-to-end method: print("------------------- Results using size metric -------------------") print(defence.get_params()) defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA") # Evaluate method when ground truth is known: is_clean = (is_poison_train == 0) confusion_matrix = defence.evaluate_defence(is_clean) print("Evaluation defence results for size-based metric: ") pprint.pprint(confusion_matrix) # Visualize clusters: print("Visualize clusters") defence.visualize_clusters(x_train, 'mnist_poison_demo') # Try again using distance analysis this time: print("------------------- Results using distance metric -------------------") print(defence.get_params()) defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA", cluster_analysis='distance') confusion_matrix = defence.evaluate_defence(is_clean) print("Evaluation defence results for distance-based metric: ") pprint.pprint(confusion_matrix) # Other ways to invoke the defence: defence.cluster_activations(n_clusters=2, ndims=10, reduce='PCA') defence.analyze_clusters(cluster_analysis='distance') defence.evaluate_defence(is_clean) defence.analyze_clusters(cluster_analysis='smaller') defence.evaluate_defence(is_clean) print("done :) ")