Beispiel #1
0
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()
        import tensorflow as tf

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """
            import cleverhans_tutorials.tutorial_models as t_models
            layers = [t_models.Conv2D(96, (3, 3), (2, 2), "VALID"),
                      t_models.ReLU(),
                      t_models.Conv2D(256, (3, 3), (2, 2), "VALID"),
                      t_models.ReLU(),
                      t_models.Conv2D(384, (3, 3), (2, 2), "VALID"),
                      t_models.ReLU(),
                      t_models.Conv2D(384, (3, 3), (2, 2), "VALID"),
                      t_models.ReLU(),
                      t_models.Conv2D(256, (3, 3), (2, 2), "VALID"),
                      t_models.ReLU(),
                      t_models.Flatten(),
                      t_models.Linear(4096),
                      t_models.ReLU(),
                      t_models.Linear(4096),
                      t_models.ReLU(),
                      t_models.Linear(1000),
                      t_models.Softmax()]
            layers[-3].name = 'fc7'

            model = t_models.MLP(layers, input_shape)
            return model

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model)
def main(argv):
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    input_shape = [FLAGS.batch_size, 224, 224, 3]
    x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.))
    x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.))
    print("Input shape:")
    print(input_shape)

    model = make_imagenet_cnn(input_shape)
    print("Model:")
    for i, layer in enumerate(model.layers):
        print('%s %s' % (model.layer_names[i], layer.output_shape))
    attack = FastFeatureAdversaries(model)
    attack_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': FLAGS.nb_iter,
        'eps_iter': 0.01,
        'layer': FLAGS.layer
    }
    x_adv = attack.generate(x_src, x_guide, **attack_params)
    h_adv = model.fprop(x_adv)[FLAGS.layer]
    h_src = model.fprop(x_src)[FLAGS.layer]
    h_guide = model.fprop(x_guide)[FLAGS.layer]

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        ha, hs, hg, xa, xs, xg = sess.run(
            [h_adv, h_src, h_guide, x_adv, x_src, x_guide])

        print("L2 distance between source and adversarial example `%s`: %.4f" %
              (FLAGS.layer, ((hs - ha) * (hs - ha)).sum()))
        print("L2 distance between guide and adversarial example `%s`: %.4f" %
              (FLAGS.layer, ((hg - ha) * (hg - ha)).sum()))
        print("L2 distance between source and guide `%s`: %.4f" %
              (FLAGS.layer, ((hg - hs) * (hg - hs)).sum()))
        print("Maximum perturbation: %.4f" % np.abs((xa - xs)).max())
        print("Original features: ")
        print(hs[:10, :10])
        print("Adversarial features: ")
        print(ha[:10, :10])
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """
            class ModelImageNetCNN(Model):
                def __init__(self, scope, nb_classes=1000, **kwargs):
                    del kwargs
                    Model.__init__(self, scope, nb_classes, locals())

                def fprop(self, x, **kwargs):
                    del kwargs
                    my_conv = functools.partial(
                        tf.layers.conv2d,
                        kernel_size=3,
                        strides=2,
                        padding='valid',
                        activation=tf.nn.relu,
                        kernel_initializer=HeReLuNormalInitializer)
                    my_dense = functools.partial(
                        tf.layers.dense,
                        kernel_initializer=HeReLuNormalInitializer)
                    with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
                        for depth in [96, 256, 384, 384, 256]:
                            x = my_conv(x, depth)
                        y = tf.layers.flatten(x)
                        y = my_dense(y, 4096, tf.nn.relu)
                        y = fc7 = my_dense(y, 4096, tf.nn.relu)
                        y = my_dense(y, 1000)
                        return {
                            'fc7': fc7,
                            self.O_LOGITS: y,
                            self.O_PROBS: tf.nn.softmax(logits=y)
                        }

            return ModelImageNetCNN('imagenet')

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model, sess=self.sess)
Beispiel #4
0
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """

            class ModelImageNetCNN(Model):
                def __init__(self, scope, nb_classes=1000, **kwargs):
                    del kwargs
                    Model.__init__(self, scope, nb_classes, locals())

                def fprop(self, x, **kwargs):
                    del kwargs
                    my_conv = functools.partial(tf.layers.conv2d,
                                                kernel_size=3,
                                                strides=2,
                                                padding='valid',
                                                activation=tf.nn.relu,
                                                kernel_initializer=HeReLuNormalInitializer)
                    my_dense = functools.partial(tf.layers.dense,
                                                 kernel_initializer=HeReLuNormalInitializer)
                    with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
                        for depth in [96, 256, 384, 384, 256]:
                            x = my_conv(x, depth)
                        y = tf.layers.flatten(x)
                        y = my_dense(y, 4096, tf.nn.relu)
                        y = fc7 = my_dense(y, 4096, tf.nn.relu)
                        y = my_dense(y, 1000)
                        return {'fc7': fc7,
                                self.O_LOGITS: y,
                                self.O_PROBS: tf.nn.softmax(logits=y)}

            return ModelImageNetCNN('imagenet')

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model)
Beispiel #5
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
class TestFastFeatureAdversaries(CleverHansTest):
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """
            class ModelImageNetCNN(Model):
                def __init__(self, scope, nb_classes=1000, **kwargs):
                    del kwargs
                    Model.__init__(self, scope, nb_classes, locals())

                def fprop(self, x, **kwargs):
                    del kwargs
                    my_conv = functools.partial(
                        tf.layers.conv2d,
                        kernel_size=3,
                        strides=2,
                        padding='valid',
                        activation=tf.nn.relu,
                        kernel_initializer=HeReLuNormalInitializer)
                    my_dense = functools.partial(
                        tf.layers.dense,
                        kernel_initializer=HeReLuNormalInitializer)
                    with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
                        for depth in [96, 256, 384, 384, 256]:
                            x = my_conv(x, depth)
                        y = tf.layers.flatten(x)
                        y = my_dense(y, 4096, tf.nn.relu)
                        y = fc7 = my_dense(y, 4096, tf.nn.relu)
                        y = my_dense(y, 1000)
                        return {
                            'fc7': fc7,
                            self.O_LOGITS: y,
                            self.O_PROBS: tf.nn.softmax(logits=y)
                        }

            return ModelImageNetCNN('imagenet')

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model, sess=self.sess)

    def test_attack_strength(self):
        """
        This test generates a random source and guide and feeds them in a
        randomly initialized CNN. Checks if an adversarial example can get
        at least 50% closer to the guide compared to the original distance of
        the source and the guide.
        """
        tf.set_random_seed(1234)
        input_shape = self.input_shape
        x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.))
        x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.))

        layer = 'fc7'
        attack_params = {
            'eps': 5. / 256,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 10,
            'eps_iter': 0.005,
            'layer': layer
        }
        x_adv = self.attack.generate(x_src, x_guide, **attack_params)
        h_adv = self.model.fprop(x_adv)[layer]
        h_src = self.model.fprop(x_src)[layer]
        h_guide = self.model.fprop(x_guide)[layer]

        init = tf.global_variables_initializer()
        self.sess.run(init)

        ha, hs, hg, xa, xs, xg = self.sess.run(
            [h_adv, h_src, h_guide, x_adv, x_src, x_guide])
        d_as = np.sqrt(((hs - ha) * (hs - ha)).sum())
        d_ag = np.sqrt(((hg - ha) * (hg - ha)).sum())
        d_sg = np.sqrt(((hg - hs) * (hg - hs)).sum())
        print("L2 distance between source and adversarial example `%s`: %.4f" %
              (layer, d_as))
        print("L2 distance between guide and adversarial example `%s`: %.4f" %
              (layer, d_ag))
        print("L2 distance between source and guide `%s`: %.4f" %
              (layer, d_sg))
        print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag * 100 / d_sg))
        self.assertTrue(d_ag * 100 / d_sg < 50.)
class TestFastFeatureAdversaries(CleverHansTest):
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """
            import cleverhans_tutorials.tutorial_models as t_models
            layers = [
                t_models.Conv2D(96, (3, 3), (2, 2), "VALID"),
                t_models.ReLU(),
                t_models.Conv2D(256, (3, 3), (2, 2), "VALID"),
                t_models.ReLU(),
                t_models.Conv2D(384, (3, 3), (2, 2), "VALID"),
                t_models.ReLU(),
                t_models.Conv2D(384, (3, 3), (2, 2), "VALID"),
                t_models.ReLU(),
                t_models.Conv2D(256, (3, 3), (2, 2), "VALID"),
                t_models.ReLU(),
                t_models.Flatten(),
                t_models.Linear(4096),
                t_models.ReLU(),
                t_models.Linear(4096),
                t_models.ReLU(),
                t_models.Linear(1000),
                t_models.Softmax()
            ]
            layers[-3].name = 'fc7'

            model = t_models.MLP(layers, input_shape)
            return model

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model)

    def test_attack_strength(self):
        """
        This test generates a random source and guide and feeds them in a
        randomly initialized CNN. Checks if an adversarial example can get
        at least 50% closer to the guide compared to the original distance of
        the source and the guide.
        """
        tf.set_random_seed(1234)
        input_shape = self.input_shape
        x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.))
        x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.))

        layer = 'fc7'
        attack_params = {
            'eps': 5. / 256,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 10,
            'eps_iter': 0.005,
            'layer': layer
        }
        x_adv = self.attack.generate(x_src, x_guide, **attack_params)
        h_adv = self.model.fprop(x_adv)[layer]
        h_src = self.model.fprop(x_src)[layer]
        h_guide = self.model.fprop(x_guide)[layer]

        init = tf.global_variables_initializer()
        self.sess.run(init)

        ha, hs, hg, xa, xs, xg = self.sess.run(
            [h_adv, h_src, h_guide, x_adv, x_src, x_guide])
        d_as = np.sqrt(((hs - ha) * (hs - ha)).sum())
        d_ag = np.sqrt(((hg - ha) * (hg - ha)).sum())
        d_sg = np.sqrt(((hg - hs) * (hg - hs)).sum())
        print("L2 distance between source and adversarial example `%s`: %.4f" %
              (layer, d_as))
        print("L2 distance between guide and adversarial example `%s`: %.4f" %
              (layer, d_ag))
        print("L2 distance between source and guide `%s`: %.4f" %
              (layer, d_sg))
        print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag * 100 / d_sg))
        self.assertTrue(d_ag * 100 / d_sg < 50.)
Beispiel #8
0
class TestFastFeatureAdversaries(CleverHansTest):
    def setUp(self):
        super(TestFastFeatureAdversaries, self).setUp()

        def make_imagenet_cnn(input_shape=(None, 224, 224, 3)):
            """
            Similar CNN to AlexNet.
            """

            class ModelImageNetCNN(Model):
                def __init__(self, scope, nb_classes=1000, **kwargs):
                    del kwargs
                    Model.__init__(self, scope, nb_classes, locals())

                def fprop(self, x, **kwargs):
                    del kwargs
                    my_conv = functools.partial(tf.layers.conv2d,
                                                kernel_size=3,
                                                strides=2,
                                                padding='valid',
                                                activation=tf.nn.relu,
                                                kernel_initializer=HeReLuNormalInitializer)
                    my_dense = functools.partial(tf.layers.dense,
                                                 kernel_initializer=HeReLuNormalInitializer)
                    with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
                        for depth in [96, 256, 384, 384, 256]:
                            x = my_conv(x, depth)
                        y = tf.layers.flatten(x)
                        y = my_dense(y, 4096, tf.nn.relu)
                        y = fc7 = my_dense(y, 4096, tf.nn.relu)
                        y = my_dense(y, 1000)
                        return {'fc7': fc7,
                                self.O_LOGITS: y,
                                self.O_PROBS: tf.nn.softmax(logits=y)}

            return ModelImageNetCNN('imagenet')

        self.input_shape = [10, 224, 224, 3]
        self.sess = tf.Session()
        self.model = make_imagenet_cnn(self.input_shape)
        self.attack = FastFeatureAdversaries(self.model)

    def test_attack_strength(self):
        """
        This test generates a random source and guide and feeds them in a
        randomly initialized CNN. Checks if an adversarial example can get
        at least 50% closer to the guide compared to the original distance of
        the source and the guide.
        """
        tf.set_random_seed(1234)
        input_shape = self.input_shape
        x_src = tf.abs(tf.random_uniform(input_shape, 0., 1.))
        x_guide = tf.abs(tf.random_uniform(input_shape, 0., 1.))

        layer = 'fc7'
        attack_params = {'eps': 5./256, 'clip_min': 0., 'clip_max': 1.,
                         'nb_iter': 10, 'eps_iter': 0.005,
                         'layer': layer}
        x_adv = self.attack.generate(x_src, x_guide, **attack_params)
        h_adv = self.model.fprop(x_adv)[layer]
        h_src = self.model.fprop(x_src)[layer]
        h_guide = self.model.fprop(x_guide)[layer]

        init = tf.global_variables_initializer()
        self.sess.run(init)

        ha, hs, hg, xa, xs, xg = self.sess.run(
            [h_adv, h_src, h_guide, x_adv, x_src, x_guide])
        d_as = np.sqrt(((hs-ha)*(hs-ha)).sum())
        d_ag = np.sqrt(((hg-ha)*(hg-ha)).sum())
        d_sg = np.sqrt(((hg-hs)*(hg-hs)).sum())
        print("L2 distance between source and adversarial example `%s`: %.4f" %
              (layer, d_as))
        print("L2 distance between guide and adversarial example `%s`: %.4f" %
              (layer, d_ag))
        print("L2 distance between source and guide `%s`: %.4f" %
              (layer, d_sg))
        print("d_ag/d_sg*100 `%s`: %.4f" % (layer, d_ag*100/d_sg))
        self.assertTrue(d_ag*100/d_sg < 50.)