def attack(model, session, a): fgsm = FastGradientMethod(model, sess=session) image = a.original_image[np.newaxis] return fgsm.generate_np(image)
class CommonAttackProperties(CleverHansTest): """ Abstract base class shared by the tessts for many attacks that want to check the same properties. """ def setUp(self): # Inheritance doesn't really work with tests. # nosetests always wants to run this class because it is a # CleverHansTest subclass, but this class is meant to just # be abstract. # Before this class was the tests for FastGradientMethod but # people kept inheriting from it for other attacks so it was # impossible to write tests specifically for FastGradientMethod. # pylint: disable=unidiomatic-typecheck if type(self) is CommonAttackProperties: raise SkipTest() super(CommonAttackProperties, self).setUp() self.sess = tf.Session() self.model = SimpleModel() def generate_adversarial_examples_np(self, ord, eps, **kwargs): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=eps, ord=ord, clip_min=-5, clip_max=5, **kwargs) if ord == np.inf: delta = np.max(np.abs(x_adv - x_val), axis=1) elif ord == 1: delta = np.sum(np.abs(x_adv - x_val), axis=1) elif ord == 2: delta = np.sum(np.square(x_adv - x_val), axis=1) ** .5 return x_val, x_adv, delta def help_generate_np_gives_adversarial_example(self, ord, eps=.5, **kwargs): x_val, x_adv, delta = self.generate_adversarial_examples_np(ord, eps, **kwargs) self.assertLess(np.max(np.abs(delta-eps)), 1e-3) orig_labs = np.argmax(self.sess.run(self.model.get_logits(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model.get_logits(x_adv)), axis=1) self.assertLess(np.max(np.mean(orig_labs == new_labs)), .5) def test_invalid_input(self): x_val = -np.ones((2, 2), dtype='float32') with self.assertRaises(tf.errors.InvalidArgumentError) as context: self.attack.generate_np(x_val, eps=1., clip_min=0., clip_max=1.) self.assertTrue(context.exception) def test_generate_np_gives_adversarial_example_linfinity(self): self.help_generate_np_gives_adversarial_example(np.infty) def test_generate_np_gives_adversarial_example_l1(self): self.help_generate_np_gives_adversarial_example(1) def test_generate_np_gives_adversarial_example_l2(self): self.help_generate_np_gives_adversarial_example(2) def test_generate_respects_dtype(self): self.attack = FastGradientMethod(self.model, sess=self.sess, dtypestr='float64') x = tf.placeholder(dtype=tf.float64, shape=(100, 2)) x_adv = self.attack.generate(x) self.assertEqual(x_adv.dtype, tf.float64) def test_targeted_generate_np_gives_adversarial_example(self): random_labs = np.random.random_integers(0, 1, 100) random_labs_one_hot = np.zeros((100, 2)) random_labs_one_hot[np.arange(100), random_labs] = 1 try: _, x_adv, delta = self.generate_adversarial_examples_np( eps=.5, ord=np.inf, y_target=random_labs_one_hot) except NotImplementedError: raise SkipTest() self.assertLessEqual(np.max(delta), 0.5001) new_labs = np.argmax(self.sess.run(self.model.get_logits(x_adv)), axis=1) self.assertTrue(np.mean(random_labs == new_labs) > 0.7) def test_generate_np_can_be_called_with_different_eps(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) for eps in [0.1, 0.2, 0.3, 0.4]: x_adv = self.attack.generate_np(x_val, eps=eps, ord=np.inf, clip_min=-5.0, clip_max=5.0) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertLessEqual(np.max(delta), eps+1e-4) def test_generate_can_be_called_with_different_eps(self): # It is crtical that this test uses generate and not generate_np. # All the other tests use generate_np. Even though generate_np calls # generate, it does so in a very standardized way, e.g. with eps # always converted to a tensorflow placeholder, so the other tests # based on generate_np do not exercise the generate API very well. x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x = tf.placeholder(tf.float32, x_val.shape) for eps in [0.1, 0.2, 0.3, 0.4]: x_adv = self.attack.generate(x, eps=eps, ord=np.inf, clip_min=-5.0, clip_max=5.0) x_adv = self.sess.run(x_adv, feed_dict={x: x_val}) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertLessEqual(np.max(delta), eps + 1e-4) def test_generate_np_clip_works_as_expected(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=0.5, ord=np.inf, clip_min=-0.2, clip_max=0.1, sanity_checks=False) self.assertClose(np.min(x_adv), -0.2) self.assertClose(np.max(x_adv), 0.1)