class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue( np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) self.assertEqual(x_val.shape, x_adv_p.shape) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange(10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue( np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue( np.mean( np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run( trivial_model.get_logits(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue( np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue( np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): import tensorflow as tf x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): import tensorflow as tf def trivial_model(x): W1 = tf.constant([[1, -1]], dtype=tf.float32) res = tf.matmul(x, W1) return res for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange( 10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): import tensorflow as tf def trivial_model(x): W1 = tf.constant([[1, -1]], dtype=tf.float32) res = tf.matmul(x, W1) return res for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(trivial_model(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
'binary_search_steps':3, 'initial_const':1, 'clip_min':0, 'clip_max':1, 'batch_size':100, 'rnd': orders, 'y_target':labels, } from cleverhans.utils_keras import KerasModelWrapper keras.backend.set_learning_phase(0) sess = keras.backend.get_session() models = [KerasModelWrapper(model) for model in models] attack = ElasticNetMethod(models, sess=sess) x_adv = attack.generate_np(x_val,**bapp_params) # orig_labs = np.argmax(model.predict(x_val), axis=1) # new_labs = np.argmax(model.predict(x_adv), axis=1) l1dist = np.linalg.norm(x_val-x_adv, ord=1, axis=-1) # l1dist = np.sum(np.absolute(x_adv-x_val, axis=-1)) print(np.mean(l1dist), np.max(l1dist), np.min(l1dist)) # print('normal mnist model acc:', np.mean(orig_labs==labels)) # print('advs mnist model acc:', np.mean(new_labs==labels)) # print('advs acc:', new_labs[orig_labs==labels] != labels[orig_labs==labels]) np.save('advs/'+conf[:-5].split('/')[-1]+'_'+str(target)+'_ead_show.npy', x_adv) # x_adv = self.attack.generate_np(x_val, max_iterations=100, # binary_search_steps=3, # initial_const=1, # clip_min=-5, clip_max=5, # batch_size=100, y_target=feed_labs)
print("After attack, the accuracy is: {}".format(adv_acc * 100)) # In[35]: wrap = KerasModelWrapper(model) en = ElasticNetMethod(wrap, sess=sess) en_params = { "beta": 0.01, "decision_rule": 'L1', "batch_size": 1000, "confidence": 0, "learning_rate": 0.1, "binary_search_steps": 9, "max_iterations": 10, "abort_early": True, "initial_const": 0.01, "clip_min": 0, "clip_max": 1 } adv_x = en.generate_np(x_test[:, None, :, :], **en_params) # In[37]: #checking the accuracy of the generated adverserial examples adv_conf = model.predict(adv_x) adv_pred = np.argmax(adv_conf, axis=1) adv_acc = np.mean(np.equal(adv_pred, y_test)) print("After attack, the accuracy is: {}".format(adv_acc * 100))
class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange( 10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(trivial_model.get_logits(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))