def test_with_preprocessing(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 100, 1000, 100 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"]) classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_train, Y_train) print("\naccuracy on training set: %.2f%%" % (scores[1] * 100)) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) attack_params = {"verbose": 0, "clip_min": 0., "clip_max": 1., "eps": 1.} attack = FastGradientMethod(classifier, session) X_train_adv = attack.generate(X_train, **attack_params) X_test_adv = attack.generate(X_test, **attack_params) self.assertFalse((X_train == X_train_adv).all()) self.assertFalse((X_test == X_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(X_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(X_test_adv)) self.assertFalse((Y_train == train_y_pred).all()) self.assertFalse((Y_test == test_y_pred).all()) scores = classifier.evaluate(X_train_adv, Y_train) print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100)) scores = classifier.evaluate(X_test_adv, Y_test) print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
def test_mnist_untargeted(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # Get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) # Perform attack df = SaliencyMapMethod(classifier, sess=session) df.set_params(clip_min=0, clip_max=1, theta=1) x_test_adv = df.generate(X_test) self.assertFalse((X_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 10, 10, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) attack_params = {"verbose": 2, "clip_min": 0., "clip_max": 1, "attacker": "deepfool"} attack = UniversalPerturbation(classifier, session) x_train_adv = attack.generate(X_train, **attack_params) self.assertTrue((attack.fooling_rate >= 0.2) or not attack.converged) x_test_adv = X_test + attack.v self.assertFalse((X_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == test_y_pred).all()) self.assertFalse((Y_train == train_y_pred).all()) scores = classifier.evaluate(x_train_adv, Y_train) print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100)) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = { "loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy'] } # get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) df = CarliniL2Method(classifier, sess=session, targeted=False, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1) params = { 'y_val': random_targets(Y_test, classifier.model.get_output_shape_at(-1)[-1]) } x_test_adv = df.generate(X_test, **params) self.assertFalse((X_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def fit(self, x_val, y_val, **kwargs): """ Train a model adversarially. Each attack specified when creating the AdversarialTrainer is applied to all samples in the dataset, and only the successful ones (on the source model) are kept for data augmentation. :param x_val: Training set :type x_val: `np.ndarray` :param y_val: Labels :type y_val: `np.ndarray` :param kwargs: Dictionary of parameters to be passed on to the `fit` method of the classifier :type kwargs: `dict` :return: `None` """ x_augmented = list(x_val.copy()) y_augmented = list(y_val.copy()) # Generate adversarial samples for each attack for i, attack in enumerate(self.attacks): # Fit the classifier to be used for the attack if needed if hasattr(attack.classifier, 'is_fitted'): if not attack.classifier.is_fitted: attack.classifier.fit(x_val, y_val, **kwargs) else: attack.classifier.fit(x_val, y_val, **kwargs) # Predict new labels for the adversarial samples generated x_adv = attack.generate(x_val, **self.attacks[attack]) y_pred = get_labels_np_array(attack.classifier.predict(x_adv)) x_adv = x_adv[ np.argmax(y_pred, axis=1) != np.argmax(y_val, axis=1)] y_adv = y_pred[ np.argmax(y_pred, axis=1) != np.argmax(y_val, axis=1)] # Only add successful attacks to augmented dataset x_augmented.extend(list(x_adv)) y_augmented.extend(list(y_adv)) # Fit the model with the extended dataset self.classifier.fit(np.array(x_augmented), np.array(y_augmented), **kwargs) self.x = x_augmented self.y = y_augmented
def test_mnist_targeted(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] im_shape = x_train[0].shape # Get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(x_train, y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(x_test, y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) # Generate random target classes import numpy as np nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=nb_test) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=nb_test) # Perform attack df = SaliencyMapMethod(classifier, sess=session, clip_min=0, clip_max=1, theta=1) x_test_adv = df.generate(x_test, y_val=targets) self.assertFalse((x_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))