def setUp(self): super(TestFastFeatureAdversaries, self).setUp() import tensorflow as tf def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ import cleverhans_tutorials.tutorial_models as t_models layers = [t_models.Conv2D(96, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(256, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(384, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(384, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(256, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Flatten(), t_models.Linear(4096), t_models.ReLU(), t_models.Linear(4096), t_models.ReLU(), t_models.Linear(1000), t_models.Softmax()] layers[-3].name = 'fc7' model = t_models.MLP(layers, input_shape) return model self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model)
def main(argv): # Set TF random seed to improve reproducibility tf.set_random_seed(1234) input_shape = [FLAGS.batch_size, 224, 224, 3] x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.)) x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.)) print("Input shape:") print(input_shape) model = make_imagenet_cnn(input_shape) print("Model:") for i, layer in enumerate(model.layers): print('%s %s' % (model.layer_names[i], layer.output_shape)) attack = FastFeatureAdversaries(model) attack_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': FLAGS.nb_iter, 'eps_iter': 0.01, 'layer': FLAGS.layer } x_adv = attack.generate(x_src, x_guide, **attack_params) h_adv = model.fprop(x_adv)[FLAGS.layer] h_src = model.fprop(x_src)[FLAGS.layer] h_guide = model.fprop(x_guide)[FLAGS.layer] with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) ha, hs, hg, xa, xs, xg = sess.run( [h_adv, h_src, h_guide, x_adv, x_src, x_guide]) print("L2 distance between source and adversarial example `%s`: %.4f" % (FLAGS.layer, ((hs - ha) * (hs - ha)).sum())) print("L2 distance between guide and adversarial example `%s`: %.4f" % (FLAGS.layer, ((hg - ha) * (hg - ha)).sum())) print("L2 distance between source and guide `%s`: %.4f" % (FLAGS.layer, ((hg - hs) * (hg - hs)).sum())) print("Maximum perturbation: %.4f" % np.abs((xa - xs)).max()) print("Original features: ") print(hs[:10, :10]) print("Adversarial features: ") print(ha[:10, :10])
def setUp(self): super(TestFastFeatureAdversaries, self).setUp() def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ class ModelImageNetCNN(Model): def __init__(self, scope, nb_classes=1000, **kwargs): del kwargs Model.__init__(self, scope, nb_classes, locals()) def fprop(self, x, **kwargs): del kwargs my_conv = functools.partial( tf.layers.conv2d, kernel_size=3, strides=2, padding='valid', activation=tf.nn.relu, kernel_initializer=HeReLuNormalInitializer) my_dense = functools.partial( tf.layers.dense, kernel_initializer=HeReLuNormalInitializer) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): for depth in [96, 256, 384, 384, 256]: x = my_conv(x, depth) y = tf.layers.flatten(x) y = my_dense(y, 4096, tf.nn.relu) y = fc7 = my_dense(y, 4096, tf.nn.relu) y = my_dense(y, 1000) return { 'fc7': fc7, self.O_LOGITS: y, self.O_PROBS: tf.nn.softmax(logits=y) } return ModelImageNetCNN('imagenet') self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model, sess=self.sess)
def setUp(self): super(TestFastFeatureAdversaries, self).setUp() def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ class ModelImageNetCNN(Model): def __init__(self, scope, nb_classes=1000, **kwargs): del kwargs Model.__init__(self, scope, nb_classes, locals()) def fprop(self, x, **kwargs): del kwargs my_conv = functools.partial(tf.layers.conv2d, kernel_size=3, strides=2, padding='valid', activation=tf.nn.relu, kernel_initializer=HeReLuNormalInitializer) my_dense = functools.partial(tf.layers.dense, kernel_initializer=HeReLuNormalInitializer) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): for depth in [96, 256, 384, 384, 256]: x = my_conv(x, depth) y = tf.layers.flatten(x) y = my_dense(y, 4096, tf.nn.relu) y = fc7 = my_dense(y, 4096, tf.nn.relu) y = my_dense(y, 1000) return {'fc7': fc7, self.O_LOGITS: y, self.O_PROBS: tf.nn.softmax(logits=y)} return ModelImageNetCNN('imagenet') self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model)
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
class TestFastFeatureAdversaries(CleverHansTest): def setUp(self): super(TestFastFeatureAdversaries, self).setUp() def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ class ModelImageNetCNN(Model): def __init__(self, scope, nb_classes=1000, **kwargs): del kwargs Model.__init__(self, scope, nb_classes, locals()) def fprop(self, x, **kwargs): del kwargs my_conv = functools.partial( tf.layers.conv2d, kernel_size=3, strides=2, padding='valid', activation=tf.nn.relu, kernel_initializer=HeReLuNormalInitializer) my_dense = functools.partial( tf.layers.dense, kernel_initializer=HeReLuNormalInitializer) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): for depth in [96, 256, 384, 384, 256]: x = my_conv(x, depth) y = tf.layers.flatten(x) y = my_dense(y, 4096, tf.nn.relu) y = fc7 = my_dense(y, 4096, tf.nn.relu) y = my_dense(y, 1000) return { 'fc7': fc7, self.O_LOGITS: y, self.O_PROBS: tf.nn.softmax(logits=y) } return ModelImageNetCNN('imagenet') self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model, sess=self.sess) def test_attack_strength(self): """ This test generates a random source and guide and feeds them in a randomly initialized CNN. Checks if an adversarial example can get at least 50% closer to the guide compared to the original distance of the source and the guide. """ tf.set_random_seed(1234) input_shape = self.input_shape x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.)) x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.)) layer = 'fc7' attack_params = { 'eps': 5. / 256, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 10, 'eps_iter': 0.005, 'layer': layer } x_adv = self.attack.generate(x_src, x_guide, **attack_params) h_adv = self.model.fprop(x_adv)[layer] h_src = self.model.fprop(x_src)[layer] h_guide = self.model.fprop(x_guide)[layer] init = tf.global_variables_initializer() self.sess.run(init) ha, hs, hg, xa, xs, xg = self.sess.run( [h_adv, h_src, h_guide, x_adv, x_src, x_guide]) d_as = np.sqrt(((hs - ha) * (hs - ha)).sum()) d_ag = np.sqrt(((hg - ha) * (hg - ha)).sum()) d_sg = np.sqrt(((hg - hs) * (hg - hs)).sum()) print("L2 distance between source and adversarial example `%s`: %.4f" % (layer, d_as)) print("L2 distance between guide and adversarial example `%s`: %.4f" % (layer, d_ag)) print("L2 distance between source and guide `%s`: %.4f" % (layer, d_sg)) print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag * 100 / d_sg)) self.assertTrue(d_ag * 100 / d_sg < 50.)
class TestFastFeatureAdversaries(CleverHansTest): def setUp(self): super(TestFastFeatureAdversaries, self).setUp() def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ import cleverhans_tutorials.tutorial_models as t_models layers = [ t_models.Conv2D(96, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(256, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(384, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(384, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Conv2D(256, (3, 3), (2, 2), "VALID"), t_models.ReLU(), t_models.Flatten(), t_models.Linear(4096), t_models.ReLU(), t_models.Linear(4096), t_models.ReLU(), t_models.Linear(1000), t_models.Softmax() ] layers[-3].name = 'fc7' model = t_models.MLP(layers, input_shape) return model self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model) def test_attack_strength(self): """ This test generates a random source and guide and feeds them in a randomly initialized CNN. Checks if an adversarial example can get at least 50% closer to the guide compared to the original distance of the source and the guide. """ tf.set_random_seed(1234) input_shape = self.input_shape x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.)) x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.)) layer = 'fc7' attack_params = { 'eps': 5. / 256, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 10, 'eps_iter': 0.005, 'layer': layer } x_adv = self.attack.generate(x_src, x_guide, **attack_params) h_adv = self.model.fprop(x_adv)[layer] h_src = self.model.fprop(x_src)[layer] h_guide = self.model.fprop(x_guide)[layer] init = tf.global_variables_initializer() self.sess.run(init) ha, hs, hg, xa, xs, xg = self.sess.run( [h_adv, h_src, h_guide, x_adv, x_src, x_guide]) d_as = np.sqrt(((hs - ha) * (hs - ha)).sum()) d_ag = np.sqrt(((hg - ha) * (hg - ha)).sum()) d_sg = np.sqrt(((hg - hs) * (hg - hs)).sum()) print("L2 distance between source and adversarial example `%s`: %.4f" % (layer, d_as)) print("L2 distance between guide and adversarial example `%s`: %.4f" % (layer, d_ag)) print("L2 distance between source and guide `%s`: %.4f" % (layer, d_sg)) print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag * 100 / d_sg)) self.assertTrue(d_ag * 100 / d_sg < 50.)
class TestFastFeatureAdversaries(CleverHansTest): def setUp(self): super(TestFastFeatureAdversaries, self).setUp() def make_imagenet_cnn(input_shape=(None, 224, 224, 3)): """ Similar CNN to AlexNet. """ class ModelImageNetCNN(Model): def __init__(self, scope, nb_classes=1000, **kwargs): del kwargs Model.__init__(self, scope, nb_classes, locals()) def fprop(self, x, **kwargs): del kwargs my_conv = functools.partial(tf.layers.conv2d, kernel_size=3, strides=2, padding='valid', activation=tf.nn.relu, kernel_initializer=HeReLuNormalInitializer) my_dense = functools.partial(tf.layers.dense, kernel_initializer=HeReLuNormalInitializer) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): for depth in [96, 256, 384, 384, 256]: x = my_conv(x, depth) y = tf.layers.flatten(x) y = my_dense(y, 4096, tf.nn.relu) y = fc7 = my_dense(y, 4096, tf.nn.relu) y = my_dense(y, 1000) return {'fc7': fc7, self.O_LOGITS: y, self.O_PROBS: tf.nn.softmax(logits=y)} return ModelImageNetCNN('imagenet') self.input_shape = [10, 224, 224, 3] self.sess = tf.Session() self.model = make_imagenet_cnn(self.input_shape) self.attack = FastFeatureAdversaries(self.model) def test_attack_strength(self): """ This test generates a random source and guide and feeds them in a randomly initialized CNN. Checks if an adversarial example can get at least 50% closer to the guide compared to the original distance of the source and the guide. """ tf.set_random_seed(1234) input_shape = self.input_shape x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.)) x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.)) layer = 'fc7' attack_params = {'eps': 5./256, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 10, 'eps_iter': 0.005, 'layer': layer} x_adv = self.attack.generate(x_src, x_guide, **attack_params) h_adv = self.model.fprop(x_adv)[layer] h_src = self.model.fprop(x_src)[layer] h_guide = self.model.fprop(x_guide)[layer] init = tf.global_variables_initializer() self.sess.run(init) ha, hs, hg, xa, xs, xg = self.sess.run( [h_adv, h_src, h_guide, x_adv, x_src, x_guide]) d_as = np.sqrt(((hs-ha)*(hs-ha)).sum()) d_ag = np.sqrt(((hg-ha)*(hg-ha)).sum()) d_sg = np.sqrt(((hg-hs)*(hg-hs)).sum()) print("L2 distance between source and adversarial example `%s`: %.4f" % (layer, d_as)) print("L2 distance between guide and adversarial example `%s`: %.4f" % (layer, d_ag)) print("L2 distance between source and guide `%s`: %.4f" % (layer, d_sg)) print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag*100/d_sg)) self.assertTrue(d_ag*100/d_sg < 50.)