Beispiel #1
0
def generate_jsma_examples(sess, model, x, y, X, Y, attack_params, verbose,
                           attack_log_fpath):
    """
    Targeted attack, with target classes in Y.
    """
    Y_target = Y

    nb_classes = Y.shape[1]

    jsma = SaliencyMapMethod(KerasModelWrapper(model), back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1.}

    adv_x_list = []

    with click.progressbar(range(0, len(X)),
                           file=sys.stderr,
                           show_pos=True,
                           width=40,
                           bar_template='  [%(bar)s] JSMA Attacking %(info)s',
                           fill_char='>',
                           empty_char='-') as bar:
        # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in bar:
            sample = X[sample_ind:(sample_ind + 1)]

            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_x_list.append(adv_x)

    return np.vstack(adv_x_list)
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5.,
                                        clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
def saliency_map_method(model,
                        X,
                        Y,
                        theta,
                        gamma,
                        clip_min=None,
                        clip_max=None):
    nb_classes = Y.shape[1]
    X_adv = np.zeros_like(X)
    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': theta,
        'gamma': gamma,
        'clip_min': clip_min,
        'clip_max': clip_max,
        'y_target': None
    }
    for i in tqdm(range(len(X))):
        # Get the sample
        sample = X[i:(i + 1)]
        # First, record the current class of the sample
        current_class = int(np.argmax(Y[i]))
        # Randomly choose a target class
        target_class = np.random.choice(
            other_classes(nb_classes, current_class))
        # This call runs the Jacobian-based saliency map approach
        one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
        one_hot_target[0, target_class] = 1
        jsma_params['y_target'] = one_hot_target
        X_adv[i] = jsma.generate_np(sample, **jsma_params)

    return X_adv
Beispiel #4
0
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()
        import tensorflow as tf
        import tensorflow.contrib.slim as slim

        def dummy_model(x):
            net = slim.fully_connected(x, 60)
            return slim.fully_connected(net, 10, activation_fn=None)

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = tf.make_template('dummy_model', dummy_model)
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5., clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
Beispiel #5
0
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5., clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
def attack(model, x_input, input_img):
    wrap = KerasModelWrapper(model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {
        'theta': 0.1,
        'gamma': 0.1,
        'clip_min': -1.,
        'clip_max': 1.,
        'y_target': None
    }
    adv_x = jsma.generate_np(input_img, **jsma_params)
    return adv_x
def mnist_jsma_attack(sample, target, model, sess):
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 3,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    jsma_params['y_target'] = target

    adv = jsma.generate_np(sample, **jsma_params)
    return adv
Beispiel #8
0
def jsma_craft(surrogate_model, source_samples, target_info):
    import tensorflow as tf
    import numpy as np
    from dataset import Data
    from cleverhans.attacks import SaliencyMapMethod

    with tf.variable_scope(surrogate_model.scope):
        sess = tf.get_default_session()

        jsma = SaliencyMapMethod(surrogate_model.tf(), back='tf', sess=sess)

        one_hot_target = np.zeros((1, 10), dtype=np.float32)
        one_hot_target[0, target_info['target_class']] = 1
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': one_hot_target
        }

        # Loop over the samples we want to perturb into adversarial examples

        adv_xs = []
        for sample_ind in range(len(source_samples)):
            sample = source_samples.getx(sample_ind)

            adv_xs.append(jsma.generate_np(sample, **jsma_params))

            # TODO remove break
            if sample_ind == 2:
                break

        adv_ys = np.concatenate([one_hot_target] * len(adv_xs))

        adv_xs_d = Data(np.concatenate(adv_xs), adv_ys)

        return source_samples, adv_xs_d
Beispiel #9
0
def generate(sess, model, data_feeder, source, target, adv_dump_dir, nb_samples, perturbation_step, max_perturbation):
    wrap = KerasModelWrapper(model.model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {
        'gamma': max_perturbation,
        'theta': perturbation_step,
        'clip_min': 0.0,
        'clip_max': 1.0,
        'y_target': data_feeder.get_labels(target, nb_samples)
    }

    craft_data = data_feeder.get_evasion_craft_data(source_class=source, total_count=nb_samples)
    adv_data = jsma.generate_np(craft_data, **jsma_params)

    # Commit data
    adv_writer = AdversarialWriterEvasion(source_class=source,
                                          target_class=target,
                                          attack_params={
                                              'step': perturbation_step,
                                              'max_perturbation':max_perturbation},
                                          adversarial_data_path=adv_dump_dir)
    adv_writer.batch_put(craft_data, adv_data)
    adv_writer.commit()
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params,
          rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
        def do_jsma():
            print('Crafting ' + str(source_samples) + ' * ' +
                  str(nb_classes - 1) + ' adversarial examples')

            # Keep track of success (adversarial example classified in target)
            results = np.zeros((nb_classes, source_samples), dtype='i')

            # Rate of perturbed features for each test set example and target class
            perturbations = np.zeros((nb_classes, source_samples), dtype='f')

            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            # Instantiate a SaliencyMapMethod attack object
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.,
                'y_target': None
            }

            figure = None
            # Loop over the samples we want to perturb into adversarial examples
            for sample_ind in xrange(0, source_samples):
                print('--------------------------------------')
                print('Attacking input %i/%i' %
                      (sample_ind + 1, source_samples))
                sample = X_test[sample_ind:(sample_ind + 1)]

                # We want to find an adversarial example for each possible target class
                # (i.e. all classes that differ from the label given in the dataset)
                current_class = int(np.argmax(Y_test[sample_ind]))
                target_classes = other_classes(nb_classes, current_class)

                # For the grid visualization, keep original images along the diagonal
                grid_viz_data[current_class,
                              current_class, :, :, :] = np.reshape(
                                  sample, (img_rows, img_cols, channels))

                # Loop over all target classes
                for target in target_classes:
                    print('Generating adv. example for target class %i' %
                          target)

                    # This call runs the Jacobian-based saliency map approach
                    one_hot_target = np.zeros((1, nb_classes),
                                              dtype=np.float32)
                    one_hot_target[0, target] = 1
                    jsma_params['y_target'] = one_hot_target
                    adv_x = jsma.generate_np(sample, **jsma_params)

                    # Check if success was achieved
                    res = int(model_argmax(sess, x, preds, adv_x) == target)

                    # Computer number of modified features
                    adv_x_reshape = adv_x.reshape(-1)
                    test_in_reshape = X_test[sample_ind].reshape(-1)
                    nb_changed = np.where(
                        adv_x_reshape != test_in_reshape)[0].shape[0]
                    percent_perturb = float(nb_changed) / adv_x.reshape(
                        -1).shape[0]

                    # Display the original and adversarial images side-by-side
                    if FLAGS.viz_enabled:
                        figure = pair_visual(
                            np.reshape(sample, (img_rows, img_cols)),
                            np.reshape(adv_x, (img_rows, img_cols)), figure)

                    # Add our adversarial example to our grid data
                    grid_viz_data[target, current_class, :, :, :] = np.reshape(
                        adv_x, (img_rows, img_cols, channels))

                    # Update the arrays for later analysis
                    results[target, sample_ind] = res
                    perturbations[target, sample_ind] = percent_perturb

            print('--------------------------------------')

            # Compute the number of adversarial examples that were successfully found
            nb_targets_tried = ((nb_classes - 1) * source_samples)
            succ_rate = float(np.sum(results)) / nb_targets_tried
            print('Avg. rate of successful adv. examples {0:.4f}'.format(
                succ_rate))
            report.clean_train_adv_eval = 1. - succ_rate

            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(perturbations)
            print('Avg. rate of perturbed features {0:.4f}'.format(
                percent_perturbed))

            # Compute the average distortion introduced for successful samples only
            percent_perturb_succ = np.mean(perturbations * (results == 1))
            print('Avg. rate of perturbed features for successful '
                  'adversarial examples {0:.4f}'.format(percent_perturb_succ))
            if FLAGS.viz_enabled:
                import matplotlib.pyplot as plt
                plt.close(figure)
                _ = grid_visual(grid_viz_data)

            return report
def main(argv=None):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :return:
    """
    # Disable Keras learning phase since we will be serving through tensorflow
    keras.layers.core.K.set_learning_phase(0)

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' "
              "to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model if it does not exist in the train_dir folder
    saver = tf.train.Saver()
    save_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.isfile(save_path):
        saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename))
    else:
        train_params = {
            'nb_epochs': FLAGS.nb_epochs,
            'batch_size': FLAGS.batch_size,
            'learning_rate': FLAGS.learning_rate
        }
        model_train(sess, x, y, preds, X_train, Y_train,
                    args=train_params)
        saver.save(sess, save_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                          args=eval_params)
    assert X_test.shape[0] == 10000, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
          str(FLAGS.nb_classes-1) + ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                             dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (FLAGS.nb_classes,
                  FLAGS.nb_classes,
                  FLAGS.img_rows,
                  FLAGS.img_cols,
                  FLAGS.nb_channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Define the SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, FLAGS.source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, FLAGS.source_samples))

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(FLAGS.nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            X_test[sample_ind:(sample_ind+1)],
            (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, FLAGS.nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params = {'theta': 1., 'gamma': 0.1,
                           'nb_classes': FLAGS.nb_classes, 'clip_min': 0.,
                           'clip_max': 1., 'targets': y,
                           'y_val': one_hot_target}
            adv_x = jsma.generate_np(X_test[sample_ind:(sample_ind+1)],
                                     **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if FLAGS.viz_enabled:
                if 'figure' not in vars():
                    figure = pair_visual(
                        np.reshape(X_test[sample_ind:(sample_ind+1)],
                                   (FLAGS.img_rows, FLAGS.img_cols)),
                        np.reshape(adv_x,
                                   (FLAGS.img_rows, FLAGS.img_cols)))
                else:
                    figure = pair_visual(
                        np.reshape(X_test[sample_ind:(sample_ind+1)],
                                   (FLAGS.img_rows, FLAGS.img_cols)),
                        np.reshape(adv_x, (FLAGS.img_rows,
                                   FLAGS.img_cols)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if FLAGS.viz_enabled:
        _ = grid_visual(grid_viz_data)
Beispiel #13
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    sess = tf.Session()

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    # Label smoothing
    assert Y_train.shape[1] == 10.

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = FLAGS.model_path
    nb_samples = FLAGS.nb_samples

    from cnn_models import make_basic_cnn
    model = make_basic_cnn('fp_',
                           input_shape=(None, img_rows, img_cols, channels),
                           nb_filters=FLAGS.nb_filters)

    preds = model(x)
    print("Defined TensorFlow model graph with %d parameters" % model.n_params)

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate(eval_params):
        # Evaluate the model on legitimate test examples
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        return acc

    model_load(sess, model_path)
    print('Restored model from %s' % model_path)
    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = evaluate(eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, nb_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, nb_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    from cleverhans.attacks import SaliencyMapMethod
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'gamma': FLAGS.gamma,
        'theta': 1.,
        'symbolic_impl': True,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in range(0, nb_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, nb_samples))
        sample = X_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if FLAGS.viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, channels)),
                    np.reshape(adv_x, (img_rows, img_cols, channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * nb_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if FLAGS.viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)
Beispiel #14
0
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
    """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    #replace
    num_threads = None
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    #with sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64

    # Define TF model graph
    model = make_basic_picklable_cnn()

    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_train, [60000, 28, 28]), y_train))
    dataset = dataset.batch(32)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_test, [10000, 28, 28]), y_test))
    val_dataset = val_dataset.batch(32)

    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    if TRAIN_NEW == 1:
        with sess.as_default():
            train(sess, loss, x_train, y_train, args=train_params, rng=rng)
            save("test.joblib", model)
    else:
        with sess.as_default():
            model = load("test.joblib")  #changed
        assert len(model.get_params()) > 0
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=0.1)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    seed(SEED)
    for sample_ind in xrange(0, source_samples):
        img = randint(0, 10000)
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[img:(img +
                             1)]  #sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(
            y_test[img]))  #current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))
        tn = 0
        totc = 0
        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Compute number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            diff = np.array(adv_x - sample)
            #print(np.sum(diff))
            diff = np.reshape(diff, (28, 28))
            diff = diff * 255
            cv2.imwrite("test.png", diff)
            diff = cv2.imread("test.png")
            diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
            nieghbors = 0
            tc = 0
            for i in range(0, 28, 1):
                for j in range(0, 28, 1):
                    if diff[i, j] > 0:
                        tc = tc + 1
                        totc = totc + 1
                        if i > 0 and i < 27 and j > 0 and j < 27:  #main grid not edges or corners
                            if diff[i - 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                        else:
                            #corners
                            if i == 0 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 0 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            #edges
                            if i == 0 and j > 0 and j < 27:  #left side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j > 0 and j < 27:  #right side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 0 and i > 0 and i < 27:  #top side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 27 and i > 0 and i < 27:  #bot side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1

            # print(tc)
            # print(nieghbors)
            tn = tn + nieghbors
            # if tc > 0:
            # print(nieghbors/tc)
            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)
            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
            #print(perturbations[target, sample_ind])

    print('--------------------------------------')

    print("average neighbors per modified pixel ", tn / totc)
    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.8f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)

    s = perturbations.shape
    myPert = np.empty(0)
    myResults = np.empty(0)
    for i in range(s[0]):
        for j in range(s[1]):
            if perturbations[i][j] > 0:
                myPert = np.append(myPert, perturbations[i][j])
                myResults = np.append(myResults, results[i][j])
    min_perturbed = np.min(myPert)
    max_perturbed = np.max(myPert)

    s2 = myResults.shape
    final = np.empty(0)
    for i in range(s2[0]):
        if myResults[i] > 0:
            final = np.append(final, myPert[i])

    print('Avg. rate of perturbed features {0:.8f}'.format(percent_perturbed))
    print('MIN of perturbed features {0:.8f}'.format(min_perturbed))
    print('MAX of perturbed features {0:.8f}'.format(max_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    min_perturb_succ = np.min(final)
    max_perturb_succ = np.max(final)
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(percent_perturb_succ))
    print('Min of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(min_perturb_succ))
    print('Max of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(max_perturb_succ))

    #Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
def mnist_attack(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 viz_enabled=True,
                 nb_epochs=6,
                 batch_size=128,
                 nb_filters=64,
                 nb_samples=10,
                 learning_rate=0.001,
                 eps=0.3,
                 attack=0,
                 attack_iterations=100,
                 model_path=None,
                 targeted=False,
                 binary=False,
                 scale=False,
                 rand=False,
                 debug=None,
                 test=False,
                 data_dir=None,
                 delay=0,
                 adv=0,
                 nb_iter=40):
    """
    MNIST tutorial for generic attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param nb_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1237)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    if debug:
        set_log_level(logging.DEBUG)
    else:
        set_log_level(logging.WARNING)  # for running on sharcnet

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name='phase')

    # for attempting to break unscaled network.
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    save = False
    train_from_scratch = False
    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    # Define TF model graph
    if binary:
        print('binary=True')
        if scale:
            print('scale=True')
            if rand:
                print('rand=True')
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
                model = make_scaled_binary_rand_cnn(
                    phase,
                    logits_scalar,
                    'binsc_',
                    input_shape=(None, img_rows, img_cols, channels),
                    nb_filters=nb_filters)
            else:
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
                model = make_scaled_binary_cnn(phase,
                                               logits_scalar,
                                               'binsc_',
                                               input_shape=(None, img_rows,
                                                            img_cols,
                                                            channels),
                                               nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bin_',
                                          nb_filters=nb_filters)
    else:
        if rand:
            print('rand=True')
            from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn
            model = make_scaled_rand_cnn(phase,
                                         logits_scalar,
                                         'fp_rand',
                                         nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_cnn
            model = make_basic_cnn(phase,
                                   logits_scalar,
                                   'fp_',
                                   nb_filters=nb_filters)

    preds = model(x, reuse=False)  # * logits_scalar
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################
    rng = np.random.RandomState([2017, 8, 30])

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    def evaluate():
        # Evaluate the accuracy of the MNIST model on clean test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

        if adv != 0:
            # Accuracy of the adversarially trained model on adversarial
            # examples
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % acc)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params,
                             feed={logits_scalar: ATTACK_T})
            print('Test accuracy on adversarial examples (scaled): %0.4f' %
                  acc)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if test:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            if test:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            evaluate=evaluate,
                            args=train_params,
                            save=save,
                            rng=rng)
            else:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            args=train_params,
                            save=save,
                            rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          preds,
                          X_test,
                          Y_test,
                          phase=phase,
                          feed={phase: False},
                          args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Build dataset
    ###########################################################################
    if viz_enabled:
        assert nb_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
        viz_rows = nb_classes if targeted else 2
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        if viz_enabled:
            from cleverhans.utils import grid_visual
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels)
        else:
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
                img_cols, channels)
    else:
        if viz_enabled:
            from cleverhans.utils import pair_visual
            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        print('Attack: CarliniWagnerL2')
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        print('Attack: SaliencyMapMethod')
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        print('Attack: FastGradientMethod')
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        print('Attack: MadryEtAl')
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    adv_np = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples)
    fpath = os.path.join(
        '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name)
    np.savez(fpath, x=adv_np, y=Y_test[:nb_samples])
    '''
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                         phase: False}, args=eval_params)
    '''
    eval_params = {'batch_size': att_batch_size}
    if targeted:
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_np,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)

    else:
        print("Evaluating untargeted results")
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[idxs],
                                      phase=phase,
                                      args=eval_params)
        else:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[:nb_samples],
                                      phase=phase,
                                      args=eval_params)

    if viz_enabled:
        n = nb_classes - 1
        for i in range(nb_classes):
            if targeted:
                for j in range(nb_classes):
                    if i != j:
                        if j != 0 and i != n:
                            grid_viz_data[i, j] = adv_np[j * n + i]
                        if j == 0 and i > 0 or i == n and j > 0:
                            grid_viz_data[i, j] = adv_np[j * n + i - 1]
                    else:
                        grid_viz_data[i, j] = adv_inputs[j * n]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv_np[j]
        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Compute number of modified features (L_0 norm)
    nb_changed = np.where(adv_np != adv_inputs)[0].shape[0]
    percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0])

    # Compute the average distortion introduced by the algorithm
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f}'.format(accuracy))
    print('{0:.4f}'.format(adv_accuracy))
    print('{0:.4f}'.format(percent_perturbed))
    print('{0:.4f}'.format(percent_perturb))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Beispiel #16
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter

    #### EMPIR extra flags
    lowprecision = FLAGS.lowprecision
    abits = FLAGS.abits
    wbits = FLAGS.wbits
    abitsList = FLAGS.abitsList
    wbitsList = FLAGS.wbitsList
    stocRound = True if FLAGS.stocRound else False
    rand = FLAGS.rand
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree = True if FLAGS.ensembleThree else False
    abits2 = FLAGS.abits2
    wbits2 = FLAGS.wbits2
    abits2List = FLAGS.abits2List
    wbits2List = FLAGS.wbits2List
    inpgradreg = True if FLAGS.inpgradreg else False
    distill = True if FLAGS.distill else False
    student_epochs = FLAGS.student_epochs
    l2dbl = FLAGS.l2dbl
    l2cs = FLAGS.l2cs
    ####

    save = False
    train_from_scratch = False

    if ensembleThree:
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, batch_size,
                                                   nb_filters, learning_rate,
                                                   nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if ensembleThree:
        if (wbitsList is None) or (
                abitsList is None
        ):  # Layer wise separate quantization not specified for first model
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec1 = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the first model"
            )
            sys.exit(1)
        else:
            fixedPrec1 = 0

        if (wbits2List is None) or (
                abits2List is None
        ):  # Layer wise separate quantization not specified for second model
            if (wbits2 == 0) or (abits2 == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec2 = 1
        elif (len(wbits2List) != 3) or (len(abits2List) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the second model"
            )
            sys.exit(1)
        else:
            fixedPrec2 = 0

        if (fixedPrec2 != 1) or (
                fixedPrec1 != 1
        ):  # Atleast one of the models have separate precisions per layer
            fixedPrec = 0
            print("Within atleast one model has separate precisions")
            if (fixedPrec1 == 1):  # first layer has fixed precision
                abitsList = (abits, abits, abits)
                wbitsList = (wbits, wbits, wbits)
            if (fixedPrec2 == 1):  # second layer has fixed precision
                abits2List = (abits2, abits2, abits2)
                wbits2List = (wbits2, wbits2, wbits2)
        else:
            fixedPrec = 1

        if (train_from_scratch):
            print("The ensemble model cannot be trained from scratch")
            sys.exit(1)
        if fixedPrec == 1:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn
            model = make_ensemble_three_cifar_cnn(phase,
                                                  logits_scalar,
                                                  'lp1_',
                                                  'lp2_',
                                                  'fp_',
                                                  wbits,
                                                  abits,
                                                  wbits2,
                                                  abits2,
                                                  input_shape=(None, img_rows,
                                                               img_cols,
                                                               channels),
                                                  nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn_layerwise
            model = make_ensemble_three_cifar_cnn_layerwise(
                phase,
                logits_scalar,
                'lp1_',
                'lp2_',
                'fp_',
                wbitsList,
                abitsList,
                wbits2List,
                abits2List,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters)
    elif lowprecision:
        if (wbitsList is None) or (
                abitsList is
                None):  # Layer wise separate quantization not specified
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags"
                )
                sys.exit(1)
            else:
                fixedPrec = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers"
            )
            sys.exit(1)
        else:
            fixedPrec = 0

        if fixedPrec:
            from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_cifar_cnn
            model = make_basic_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbits,
                abits,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
        else:
            from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_cifar_cnn
            model = make_layerwise_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbitsList,
                abitsList,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
    elif distill:
        from cleverhans_tutorials.tutorial_models import make_distilled_cifar_cnn
        model = make_distilled_cifar_cnn(phase,
                                         logits_scalar,
                                         'teacher_fp_',
                                         'fp_',
                                         nb_filters=nb_filters,
                                         input_shape=(None, img_rows, img_cols,
                                                      channels))
    ####
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cifar_cnn
        model = make_basic_cifar_cnn(phase,
                                     logits_scalar,
                                     'fp_',
                                     input_shape=(None, img_rows, img_cols,
                                                  channels),
                                     nb_filters=nb_filters)

    # separate predictions of teacher for distilled training
    if distill:
        teacher_preds = model.teacher_call(x, reuse=False)
        teacher_logits = model.get_teacher_logits(x, reuse=False)

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
        ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)
        else:
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if distill:
            temperature = 10  # 1 means the teacher predictions are used as it is
            teacher_scaled_preds_val = model_train_teacher(sess,
                                                           x,
                                                           y,
                                                           teacher_preds,
                                                           teacher_logits,
                                                           temperature,
                                                           X_train,
                                                           Y_train,
                                                           phase=phase,
                                                           args=train_params,
                                                           rng=rng)
            eval_params = {'batch_size': batch_size}
            teacher_acc = model_eval(sess,
                                     x,
                                     y,
                                     teacher_preds,
                                     X_test,
                                     Y_test,
                                     phase=phase,
                                     args=eval_params)
            print(
                'Test accuracy of the teacher model on legitimate examples: %0.4f'
                % teacher_acc)
            print('Training the student model...')
            student_train_params = {
                'nb_epochs': student_epochs,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'loss_name': 'train loss',
                'filename': 'model',
                'reuse_global_step': False,
                'train_scope': 'train',
                'is_training': True
            }
            if save:
                student_train_params.update({'log_dir': model_path})
            y_teacher = tf.placeholder(tf.float32, shape=(None, nb_classes))
            model_train_student(sess,
                                x,
                                y,
                                preds,
                                temperature,
                                X_train,
                                Y_train,
                                y_teacher=y_teacher,
                                teacher_preds=teacher_scaled_preds_val,
                                alpha=0.3,
                                beta=0.7,
                                phase=phase,
                                evaluate=evaluate,
                                args=student_train_params,
                                save=save,
                                rng=rng)
        elif inpgradreg:
            model_train_inpgrad_reg(sess,
                                    x,
                                    y,
                                    preds,
                                    X_train,
                                    Y_train,
                                    phase=phase,
                                    evaluate=evaluate,
                                    l2dbl=l2dbl,
                                    l2cs=l2cs,
                                    args=train_params,
                                    save=save,
                                    rng=rng)
        else:
            # do clean training for 'nb_epochs' or 'delay' epochs
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv_train,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

    else:
        if ensembleThree:
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            stored_variables = [
                'lp_conv1_init/k', 'lp_conv2_init/k', 'lp_conv3_init/k',
                'lp_ip1init/W', 'lp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[:5]))
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            variable_dict = dict(zip(stored_variables, variables[5:10]))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            stored_variables = [
                'fp_conv1_init/k', 'fp_conv2_init/k', 'fp_conv3_init/k',
                'fp_ip1init/W', 'fp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[10:]))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
        else:
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble(sess,
                                       x,
                                       y,
                                       preds,
                                       X_test,
                                       Y_test,
                                       phase=phase,
                                       feed={phase: False},
                                       args=eval_params)
    else:
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds,
                              X_test,
                              Y_test,
                              phase=phase,
                              feed={phase: False},
                              args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
            img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model,
                                   back='tf',
                                   model_type=model_type,
                                   num_classes=nb_classes,
                                   sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model,
                                     back='tf',
                                     model_type=model_type,
                                     sess=sess,
                                     num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model,
                                      back='tf',
                                      model_type=model_type,
                                      sess=sess,
                                      num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model,
                             back='tf',
                             model_type=model_type,
                             sess=sess,
                             num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model,
                                        back='tf',
                                        sess=sess,
                                        model_type=model_type,
                                        num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  X_test_adv,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)
    else:
        # assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        if ensembleThree:
            adv_accuracy = model_eval_ensemble(sess,
                                               x,
                                               y,
                                               preds,
                                               X_test_adv,
                                               Y_test,
                                               phase=phase,
                                               args=eval_params)
        else:  #default below
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test_adv,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()
    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
Beispiel #17
0
def effective_train_jsma(train_start=0,
                         train_end=20,
                         test_start=0,
                         test_end=10000,
                         viz_enabled=False,
                         nb_epochs=6,
                         batch_size=128,
                         nb_classes=10,
                         source_samples=10,
                         learning_rate=0.001):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    model_path = "./"
    model_name = "adv_trained_jsma_model_alpha0.4_fortest"

    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    # Define input TF placeholder
    x1 = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))  # for clean data
    x2 = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))  # for adv data
    y = tf.placeholder(tf.float32, shape=(None, 10))  # for adv clean targets

    # Initialize the model
    model = make_basic_cnn()
    preds = model(x1)
    preds_adv = model(x2)

    # Instantiate a SaliencyMapMethod attack object
    # jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    # Define loss
    loss = 0.4 * model_loss(y, preds) + 0.6 * model_loss(y, preds_adv)

    train_step = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_step = train_step.minimize(loss)

    def evaluate_2(adv_examples_last_batch, adv_clean_labels_last_batch):
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x1,
                              y,
                              preds,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x2,
                              y,
                              preds_adv,
                              adv_examples_last_batch,
                              adv_clean_labels_last_batch,
                              args=eval_params)
        print('Test accuracy on last batch of adversarial examples: %0.4f' %
              accuracy)
        report.adv_train_adv_eval = accuracy

    with sess.as_default():
        tf.global_variables_initializer().run()

        for epoch in xrange(nb_epochs):
            print('Training for epoch %i/%i' % (epoch, nb_epochs - 1))

            # Compute number of batches
            nb_batches = int(math.ceil(float(len(X_train)) / batch_size))
            assert nb_batches * batch_size >= len(X_train)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_train)))
            rng.shuffle(index_shuf)

            prev = time.time()
            for batch in range(nb_batches):
                # re-instantiate Saliency object with new trained model
                jsma = SaliencyMapMethod(model, back='tf', sess=sess)
                print('--------------------------------------')
                # create an array for storing adv examples
                print('batch: %i/%i' % (batch + 1, nb_batches))
                # adv_examples = np.empty([1,28,28,1])
                adv_examples = []
                # for target labels
                #adv_targets = np.empty([1,10])
                # corresponding clean/correct label
                # adv_clean_labels = np.empty([1,10])
                adv_clean_labels = []
                # correspongding clean data
                # adv_clean_examples = np.empty([1,28,28,1])
                adv_clean_examples = []

                for sample_ind in xrange(0, batch_size):

                    print('Attacking input %i/%i' %
                          (sample_ind + 1, batch_size))
                    # Compute batch start and end indices
                    start, end = batch_indices(batch, len(X_train), batch_size)
                    X_this_batch = X_train[index_shuf[start:end]]
                    Y_this_batch = Y_train[index_shuf[start:end]]
                    # Perform one training step
                    # feed_dict = {x: X_train[index_shuf[start:end]],y: Y_train[index_shuf[start:end]]}

                    sample = X_this_batch[sample_ind:(
                        sample_ind + 1)]  # generate from training data

                    # We want to find an adversarial example for each possible target class
                    # (i.e. all classes that differ from the label given in the dataset)
                    current_class = int(np.argmax(Y_this_batch[sample_ind])
                                        )  # generate from training data
                    target_classes = other_classes(nb_classes, current_class)
                    print('Current class is ', current_class)

                    # For the grid visualization, keep original images along the diagonal
                    # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
                    #     sample, (img_rows, img_cols, channels))

                    # Loop over all target classes
                    for target in target_classes:
                        print('Generating adv. example for target class %i' %
                              target)

                        # This call runs the Jacobian-based saliency map approach
                        one_hot_target = np.zeros((1, nb_classes),
                                                  dtype=np.float32)
                        #create fake target
                        one_hot_target[0, target] = 1
                        jsma_params['y_target'] = one_hot_target
                        adv_x = jsma.generate_np(
                            sample, **jsma_params
                        )  # get numpy array (1, 28, 28, 1), not Tensor

                        # Check if success was achieved
                        # res = int(model_argmax(sess, x, preds, adv_x) == target)
                        # if succeeds
                        # if res == 1:
                        # append new adv_x to adv_examples array
                        # append sample here, so that the number of times sample is appended mmatches number of adv_ex.
                        # adv_examples = np.append(adv_examples, adv_x, axis=0)
                        adv_examples.append(adv_x)
                        #adv_targets = np.append(adv_targets, one_hot_target, axis=0)
                        # adv_clean_labels = np.append(adv_clean_labels, np.expand_dims(Y_this_batch[sample_ind],axis=0), axis=0) # generate from training data
                        adv_clean_labels.append(Y_this_batch[sample_ind])
                        # adv_clean_examples = np.append(adv_clean_examples, sample, axis=0)
                        adv_clean_examples.append(sample)

                # what we have for this batch, batch_size * 9 data
                # adv_examples = adv_examples[1:,:,:,:]
                #adv_targets = adv_targets[1:,:]
                # adv_clean_labels = adv_clean_labels[1:,:]
                # adv_clean_examples = adv_clean_examples[1:,:,:,:]
                adv_examples = np.reshape(
                    adv_examples, (batch_size * (nb_classes - 1), 28, 28, 1))
                adv_clean_examples = np.reshape(adv_clean_examples,
                                                (batch_size *
                                                 (nb_classes - 1), 28, 28, 1))
                feed_dict = {
                    x1: adv_clean_examples,
                    x2: adv_examples,
                    y: adv_clean_labels
                }
                train_step.run(feed_dict=feed_dict)

            cur = time.time()
            _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                         " seconds")

            evaluate_2(adv_examples, adv_clean_labels)
        print('Training finished.')

        # report on clean test data
        preds_test = model(x1)
        eval_par = {'batch_size': 10}
        acc_clean = model_eval(sess,
                               x1,
                               y,
                               preds_test,
                               X_test,
                               Y_test,
                               args=eval_par)
        print('Test accuracy on legitimate examples: %0.4f\n' % acc_clean)
        # reload fgsm successfully attacking adv test data
        # with np.load("adversarial_fgsm.npz") as data:
        #     adv_X_test, adv_clean_Y_test, adv_clean_X_test = data['adv_examples'], data['adv_clean_labels'], data['adv_clean_examples']
        # print('FGSM adversarial data are successfully reloaded.')
        # preds_adv_test = model(x1)
        # # Evaluate the accuracy of the MNIST model on adversarial examples
        # # eval_par = {'batch_size': 10}
        # acc = model_eval(sess, x1, y, preds_adv_test, adv_X_test, adv_clean_Y_test, args=eval_par)
        # print('Test accuracy on pre-generated adversarial examples of fgsm: %0.4f\n' % acc)
        # # reload fgsm successfully attacking adv test data
        # with np.load("adversarial_mnist_test_from_1500.npz") as data:
        #     adv_X_test, adv_clean_Y_test, adv_clean_X_test = data['adv_examples'], data['adv_clean_labels'], data['adv_clean_examples']
        # print('JSMA adversarial data are successfully reloaded.')
        # # Evaluate the accuracy of the MNIST model on adversarial examples
        # acc2 = model_eval(sess, x1, y, preds_adv_test, adv_X_test, adv_clean_Y_test, args=eval_par)
        # print('Test accuracy on pre-generated adversarial examples of jsma: %0.4f\n' % acc2)
        save_path = os.path.join(model_path, model_name)
        saver = tf.train.Saver()
        saver.save(sess, save_path)
        _logger.info("Completed model training and saved at: " +
                     str(save_path))
        # Close TF session
        sess.close()
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    binary = True if FLAGS.binary else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations

    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            model = make_scaled_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase, 'fp_', input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(
            sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv:
        from cleverhans.attacks import FastGradientMethod
        fgsm = FastGradientMethod(model, back='tf', sess=sess)
        fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
        adv_x_train = fgsm.generate(x, phase, **fgsm_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                    evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                        predictions_adv=preds_adv, evaluate=evaluate, args=train_params,
                        save=save, rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase,
                          feed={phase: False}, args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(
            nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': att_batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels,
                                  phase=phase, args=eval_params)
    else:
        assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test,
                                  phase=phase, args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((X_test_adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()

    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
Beispiel #19
0
print("The Test accuracy is: {}".format(acc))

################################################# Adversarial Attack  #####################################################
#JSMA
wrap = KerasModelWrapper(keras_model)
jsma = SaliencyMapMethod(wrap, back='tf', sess=sess)
jsma_params = {
    'theta': 0.4,
    'gamma': 0.4,
    'clip_min': 0.,
    'clip_max': 1.,
    'y_target': None
}

adv_x = jsma.generate_np(X_test, **jsma_params)
adv_conf = keras_model.predict(adv_x)
adv_pred = np.argmax(adv_conf, axis=1)
adv_pred = np.reshape(adv_pred, (-1, 1))
adv_acc = np.mean(np.equal(adv_pred, Y_test.argmax(axis=1)))
print("The adversarial  accuracy is: {}".format(adv_acc))

################################################# De-Normalizing Adv samples ##############################################
adv_x_unscaled = scaler.inverse_transform(adv_x)
a = np.ceil(adv_x_unscaled)
a = adv_x_unscaled.astype(np.int32)
adv_conf_1 = keras_model.predict(a)
adv_pred_1 = np.argmax(adv_conf_1, axis=1)
adv_pred_1 = np.reshape(adv_pred_1, (-1, 1))
adv_acc_1 = np.mean(np.equal(adv_pred_1, Y_test.argmax(axis=1)))
print("The adversarial  accuracy is: {}".format(adv_acc_1))
def main():
    random_seed = 1024
    train_size, test_size = 55000, 10000
    batch_size = 100
    learning_rate = 0.05
    epochs = 5
    steps = epochs * 550
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    sess = tf.InteractiveSession(config=config)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    tf.set_random_seed(random_seed)

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label

    noise = tf.placeholder(tf.float32, [None, 100])  # noise vector
    y_target = tf.placeholder(tf.float32, [None, 10])  # target label

    x_perturb = x + tf.random_normal(
        shape=tf.shape(x), mean=0.0, stddev=0.5, dtype=tf.float32)
    x_perturb = tf.clip_by_value(x_perturb, 0, 1)

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])

    y_n = classifier_n(x)
    y = classifier(x)
    y_perturb = classifier(x_perturb)

    # gan
    x_gan = generator(x, noise)
    y_gan = classifier(x_gan)

    loss_cls = softmax_loss(label, y_n)  #+ softmax_loss(label, y_gan)
    loss_gan = -softmax_loss(label, y_gan)

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    g_vars = [var for var in all_vars if 'generator' in var.name]
    train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss_cls, var_list = c_vars, global_step = global_step)
    train_op_generator = GradientDescentOptimizer(learning_rate = 0.05) \
        .minimize(loss_gan, var_list = g_vars, global_step = global_step)

    #fgsm
    x_fgsm = attack.fgsm(x, y, eps=0.2, clip_min=0, clip_max=1)
    y_fgsm = classifier(x_fgsm)
    # random fgsm
    x_fgsm_rd = attack.fgsm(x_perturb,
                            y_perturb,
                            eps=0.2,
                            clip_min=0,
                            clip_max=1)
    y_fgsm_rd = classifier(x_fgsm_rd)
    # jsma
    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)

    # train
    saver = tf.train.Saver()
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    x_fgsm_mnist = np.load(os.path.join('data', 'x_fgsm_mnist.npy'))
    x_gan_mnist = np.load(os.path.join('data', 'x_gan_mnist.npy'))
    x_jsma_mnist_1 = np.load(os.path.join('data', 'x_jsma_mnist_1.npy'))

    y_target_batch = np.zeros((100, 10), dtype=np.float32)
    y_target_batch[:, 0] = 1.0
    y_target_test = np.zeros((10000, 10), dtype=np.float32)
    y_target_test[:, 0] = 1.0

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        acc = {}
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            noise_d = sample_Z(batch_size, 100)
            f_dict = {
                x: batch[0],
                label: batch[1],
                noise: noise_d,
                y_target: y_target_batch
            }
            sess.run(train_op_classifier, feed_dict=f_dict)
            #for j in range(1):
            #sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                acc['benign'] = sess.run(get_acc(x, label),
                                         feed_dict={
                                             x: mnist.test.images,
                                             label: mnist.test.labels
                                         })
                acc['pre fgsm'] = sess.run(get_acc(x, label),
                                           feed_dict={
                                               x: x_fgsm_mnist,
                                               label: mnist.test.labels
                                           })
                acc['pre gan'] = sess.run(get_acc(x, label),
                                          feed_dict={
                                              x: x_gan_mnist,
                                              label: mnist.test.labels
                                          })
                acc['pre jsma 1'] = sess.run(get_acc(x, label),
                                             feed_dict={
                                                 x: x_jsma_mnist_1,
                                                 label:
                                                 mnist.test.labels[0:100, ]
                                             })

                x_fgsm_d = sess.run(x_fgsm,
                                    feed_dict={
                                        x: mnist.test.images,
                                        label: mnist.test.labels
                                    })
                acc['fgsm'] = sess.run(get_acc(x, label),
                                       feed_dict={
                                           x: x_fgsm_d,
                                           label: mnist.test.labels
                                       })

                x_fgsm_rd_d = sess.run(x_fgsm_rd,
                                       feed_dict={
                                           x: mnist.test.images,
                                           label: mnist.test.labels
                                       })
                acc['fgsm_rd'] = sess.run(get_acc(x, label),
                                          feed_dict={
                                              x: x_fgsm_rd_d,
                                              label: mnist.test.labels
                                          })

                print(epoch, acc)
        '''
        print('train gan')
        for t in range(1, 550 * 10 + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
            sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                batch = mnist.test.next_batch(batch_size)
                f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
                x_gan_data = sess.run(x_gan, feed_dict=f_dict)
                acc_gan = sess.run(get_acc(x, label), feed_dict={x: x_gan_data, label: batch[1]})
                print(epoch, acc_gan)

        x_fgsm_d = sess.run(x_fgsm, feed_dict = {x: mnist.test.images, label: mnist.test.labels})
        acc['fgsm'] = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_d, label: mnist.test.labels})

        x_gan_d = sess.run(x_gan, feed_dict={x: mnist.test.images ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        acc['gan'] = sess.run(get_acc(x, label), feed_dict={x: x_gan_d ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        '''
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_batch}
        x_jsma_1_d = jsma.generate_np(mnist.test.images[0:100, ],
                                      **jsma_params)
        acc['jsma 1'] = sess.run(get_acc(x, label),
                                 feed_dict={
                                     x: x_jsma_1_d,
                                     label: mnist.test.labels[0:100, ]
                                 })

        print(acc['jsma 1'])
Beispiel #21
0
            np.reshape(
                PGD_X,
                (10, PGD_X.shape[0] // 10, img_rows, img_cols, nchannels)))

if 'JSMA_hans' in attacks:
    jsma = JSMA_hans(clever, sess=sess)
    jsma_params = {
        'theta': 2.,
        'gamma': MAX_ITER / (img_cols * img_rows * nchannels),
        'clip_min': -1.,
        'clip_max': 1.,
        'y_target': None
    }
    if not Targeted:
        start = time.time()
        JSMA_X = jsma.generate_np(X, **jsma_params)
        end = time.time()
        print('Time:%.1f' % (end - start))
    elif Targeted:
        JSMA_X = np.zeros([10, np.size(m), img_rows, img_cols, nchannels])
        start = time.time()
        for target in range(10):
            one_hot_target = np.zeros((1, 10))
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            JSMA_X[target] = jsma.generate_np(X, **jsma_params)
        JSMA_X = JSMA_X.swapaxes(0, 1).reshape(
            [10 * np.size(m), img_rows, img_cols, nchannels])
        end = time.time()
        print('Time:%.1f' % (end - start))
    succ_rate, measure = evaluate(model, X, JSMA_X, Targeted)
Beispiel #22
0
def simulate_jsma():
    # MNIST-specific dimensions
    # img_rows = 28
    # img_cols = 28
    # channels = 1

    # Get MNIST test data
    x_test, y_test = get_mnist_data()
                                
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    ##################################
    #          Load Model            #
    ##################################
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        save_path = r'C:\Users\hhkim\Desktop\scc\cleverhans\cleverhans_tutorials\model_data\train_model'
        saver.restore(sess, save_path)

        sample_class = int(input('input sample class(0-9): '))
        target_class = int(input('input target class(0-9): '))

        if sample_class<0 or sample_class>9 or target_class<0 or target_class>9 :
            print('input is wrong')

        sample_idx = get_mnist_idx(y_test, sample_class)
        target_idx = get_mnist_idx(y_test, target_class)

        sample = x_test[sample_idx:sample_idx+1]

        # Instantiate a SaliencyMapMethod attack object
        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        jsma_params = {'theta': 1., 'gamma': 0.1,
                    'clip_min': 0., 'clip_max': 1.,
                    'y_target': None}

        jsma_params['y_target'] = y_test[target_idx:target_idx+1]
        adv_x = jsma.generate_np(sample, **jsma_params)

        print('sample class:', np.argmax(y_test[sample_idx]))
        print('target class:', np.argmax(y_test[target_idx]))

        # Get array of output
        # updated by hak hyun kim 
        feed_dict = {x: adv_x}        
        probabilities = sess.run(preds, feed_dict)

        print('==========================================')
        def softmax(x):
            e_x = np.exp(x - np.max(x))
            return e_x / e_x.sum()
        print(softmax(probabilities))
        #print(probabilities)
        print('==========================================')

        print('{} class is recognized by {} '.format(sample_class, target_class))


    # save the adverisal image #
    two_d_img = (np.reshape(adv_x, (28, 28)) * 255).astype(np.uint8)
    from PIL import Image
    save_image = Image.fromarray(two_d_img)
    save_image = save_image.convert('RGB')
    save_image.save("res.png")

    sess.close()
Beispiel #23
0
	# We want to find an adversarial example for each possible target class
	# (i.e. all classes that differ from the label given in the dataset)
	current_class = int(np.argmax(y_test[sample_ind]))

	# Only target the normal class
	for target in [0]:
		if current_class == 0:
			break

		print('Generating adv. example for target class {} for sample {}'.format(target, sample_ind), end='\r')

		# Run the Jacobian-based saliency map approach
		one_hot_target = np.zeros((1, FLAGS.nb_classes), dtype=np.float32)
		one_hot_target[0, target] = 1
		jsma_params['y_target'] = one_hot_target
		adv_x = jsma.generate_np(sample, **jsma_params)

		# Check if success was achieved
		res = int(model_argmax(sess, x, predictions, adv_x) == target)

		# Compute number of modified features
		adv_x_reshape = adv_x.reshape(-1)
		test_in_reshape = X_test_scaled[sample_ind].reshape(-1)
		nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
		percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

		X_adv[sample_ind] = adv_x
		results[target, sample_ind] = res
		perturbations[target, sample_ind] = percent_perturb

print()
Beispiel #24
0
def cifar10_tutorial_jsma(train_start=0,
                          train_end=60000,
                          test_start=0,
                          test_end=10000,
                          viz_enabled=VIZ_ENABLED,
                          nb_epochs=NB_EPOCHS,
                          batch_size=BATCH_SIZE,
                          source_samples=SOURCE_SAMPLES,
                          learning_rate=LEARNING_RATE,
                          model_path=MODEL_PATH,
                          noise_output=NOISE_OUTPUT):
    """
  CIFAR10 tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    cifar10 = CIFAR10(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    # Loop over the samples we want to perturb into adversarial examples
    adv_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f')
    sample_all = np.zeros((nb_classes, img_rows, img_cols, nchannels),
                          dtype='f')
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_all[current_class] = adv_x
            sample_all[current_class] = sample

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            # Display the original and adversarial images side-by-side
            # if viz_enabled:
            #   figure = pair_visual(
            #       np.reshape(sample, (img_rows, img_cols, nchannels)),
            #       np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # # Add our adversarial example to our grid data
            # grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #     adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Compute the average distortion introduced by the algorithm
    l2_norm = np.mean(np.sum((adv_all - sample_all)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(l2_norm))

    for i in range(nb_classes):
        if noise_output:
            image = adv_all[i] - sample_all[i]
        else:
            image = adv_all[i]
        grid_viz_data[i, 0] = image

    # Close TF session
    sess.close()

    def save_visual(data, path):
        """
    Modified version of cleverhans.plot.pyplot
    """
        import matplotlib.pyplot as plt

        figure = plt.figure()
        # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = data.shape[0]
        num_rows = data.shape[1]
        num_channels = data.shape[4]
        for y in range(num_rows):
            for x in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (x + 1) + (y * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(data[x, y, :, :, 0], cmap='gray')
                else:
                    plt.imshow(data[x, y, :, :, :])

        # Draw the plot and return
        plt.savefig(path)

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        if noise_output:
            image_name = "output/jsma_cifar10_noise.png"
        else:
            image_name = "output/jsma_cifar10.png"
        _ = save_visual(grid_viz_data, image_name)

    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS, num_threads=None,
                   label_smoothing=0.1):
  """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {'allow_soft_placement' : True}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get MNIST data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}
  rng = np.random.RandomState([2017, 8, 30])

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  if clean_train:
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def evaluate():
      do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng, var_list=model.get_params())

    # Calculate training error
    if testing:
      do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    jsma = SaliencyMapMethod(model, sess=sess)
    adv_jsma_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_logits(adv_jsma_x)

    # Generate JSMA adversarial examples and save to disk
    dir = 'images/jsma_rotated_adv/'
    if not os.path.exists('images'):
        os.mkdir('images')
    if not os.path.exists(dir):
        os.mkdir(dir)
    if not os.path.exists(dir+'train/'):
        os.mkdir(dir+'train/')
    if not os.path.exists(dir+'test/'):
        os.mkdir(dir+'test/')
    for index in range(len(y_test)):
        print('test '+str(index))
        x_ = x_test[index]
        label = np.argmax(y_test[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'test/' + str(label)+'_'+str(uuid.uuid4())+'.png')
    for index in range(len(y_train)):
        print('train ' + str(index))
        x_ = x_train[index]
        label = np.argmax(y_train[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'train/' + str(label)+'_'+str(uuid.uuid4())+'.png')



  return report
def main():
    random_seed = 1024
    train_size, test_size = 55000, 10000
    batch_size = 100
    learning_rate = 0.05
    epochs = 20
    steps = epochs * 550
    sess = tf.Session()
    global_step = tf.Variable(0, name = 'global_step', trainable = False)
    tf.set_random_seed(random_seed)
    saver = tf.train.Saver()

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label
    noise = tf.placeholder(tf.float32, [None, 100]) # noise vector
    y_target = tf.placeholder(tf.float32, [None, 10]) # target label

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])
    
    y = classifier(x)

    # gan 
    x_gan = generator(x, noise, 4)
    y_gan = classifier(x_gan)
    
    loss = softmax_loss(label, y)
    loss_gan = - softmax_loss(label, y_gan)

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    g_vars = [var for var in all_vars if 'generator' in var.name]
    train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss, var_list = c_vars, global_step = global_step)
    train_op_generator = GradientDescentOptimizer(learning_rate = 0.05) \
        .minimize(loss_gan, var_list = g_vars, global_step = global_step)

    #fgsm 
    x_fgsm = attack.fgsm(x, y, eps = 0.2, clip_min=0, clip_max=1)
    y_fgsm = classifier(x_fgsm)
    # jsma
    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
    
    # train
    
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    

    y_target_batch = np.zeros((100, 10), dtype=np.float32) 
    y_target_batch[:, 0] = 1.0
    y_target_test = np.zeros((10000, 10), dtype=np.float32) 
    y_target_test[:, 0] = 1.0

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_op_classifier, feed_dict={x: batch[0], label: batch[1]})
            if t % 550 == 0:
                epoch = int(t / 550)
                acc_benign = sess.run(get_acc(x, label), feed_dict={x: mnist.test.images, label: mnist.test.labels})
                print(epoch, acc_benign)
        
        print('train gan')
        for t in range(1, 550 * 5 + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
            sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                f_dict = {x: mnist.test.images, label: mnist.test.labels, noise: sample_Z(10000, 100), y_target: y_target_batch}
                x_gan_d = sess.run(x_gan, feed_dict=f_dict)
                f_dict = {x: x_gan_d, label: mnist.test.labels}
                acc_gan = sess.run(get_acc(x, label), feed_dict=f_dict)
                print(epoch, acc_gan)

        checkpoint_path = os.path.join('model', 'basic_model.ckpt')
        #saver.save(sess, checkpoint_path, global_step = 1)


        print('generate adv samples for the first batch of the testing set')
        # real
        x_real_mnist_1 = mnist.test.images[0:100,]
        np.save(os.path.join('data','x_real_mnist_1.npy'), x_real_mnist_1)
        x_real_mnist_1_r = x_real_mnist_1.reshape([100, 28, 28])
        save_images(x_real_mnist_1_r, [10, 10], os.path.join('img', 'x_real_mnist_1.png'))
        # fgsm
        x_fgsm_mnist_1 = sess.run(x_fgsm, feed_dict = {x: mnist.test.images[0:100,], label: mnist.test.labels[0:100,]})
        np.save(os.path.join('data','x_fgsm_mnist_1.npy'), x_fgsm_mnist_1)
        x_fgsm_mnist_1_r = x_fgsm_mnist_1.reshape([100, 28, 28])
        save_images(x_fgsm_mnist_1_r, [10, 10], os.path.join('img', 'x_fgsm_mnist_1.png'))
        #jsma
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_batch}
        x_jsma_mnist_1 = jsma.generate_np(mnist.test.images[0:100,], **jsma_params)
        np.save(os.path.join('data','x_jsma_mnist_1.npy'), x_jsma_mnist_1)
        acc_jsma_1 = sess.run(get_acc(x, label), feed_dict={x: x_jsma_mnist_1, label: mnist.test.labels[0:100,]})
        x_jsma_mnist_1_r = x_jsma_mnist_1.reshape([100, 28, 28])
        save_images(x_jsma_mnist_1_r, [10, 10], os.path.join('img', 'x_jsma_mnist_1.png'))
        
        x_gan_mnist_1 = sess.run(x_gan, feed_dict={x: mnist.test.images[0:100,] ,label: mnist.test.labels[0:100,]\
            , noise: sample_Z(batch_size, 100), y_target: y_target_batch})
        np.save(os.path.join('data','x_gan_mnist_1.npy'), x_gan_mnist_1)
        x_gan_mnist_1_r = x_gan_mnist_1.reshape([100, 28, 28])
        save_images(x_gan_mnist_1_r, [10, 10], os.path.join('img', 'x_gan_mnist_1.png'))

        diff_fgsm = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_fgsm_mnist_1})
        diff_jsma = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_jsma_mnist_1})
        diff_gan = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_gan_mnist_1})
        print('perturb: fgsm: {:.3f}, jsma: {:.3f}, gan: {:.3f}'.format(diff_fgsm, diff_jsma, diff_gan))

        acc_benign = sess.run(get_acc(x, label), feed_dict={x: mnist.test.images, label: mnist.test.labels})

        print('generate adv samples for the entire testing set')
        # fgsm
        x_fgsm_mnist = sess.run(x_fgsm, feed_dict = {x: mnist.test.images, label: mnist.test.labels})
        np.save(os.path.join('data','x_fgsm_mnist.npy'), x_fgsm_mnist)
        acc_fgsm = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_mnist, label: mnist.test.labels})
        
        # gan
        x_gan_mnist = sess.run(x_gan, feed_dict={x: mnist.test.images ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        np.save(os.path.join('data','x_gan_mnist.npy'), x_gan_mnist)
        acc_gan = sess.run(get_acc(x, label), feed_dict={x: x_gan_mnist ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})

        print('accuracy: benign: {:.3f}, fgsm: {:.3f}, jsma: {:.3f}, gan: {:.3f}'.format(acc_benign, acc_fgsm, acc_jsma_1, acc_gan))

        '''
        x_fgsm_mnist = np.load(os.path.join('data','x_fgsm_mnist.npy'))
        x_gan_mnist = np.load(os.path.join('data','x_gan_mnist.npy'))
        x_jsma_mnist_1 = np.load(os.path.join('data','x_jsma_mnist_1.npy'))
        sess.run(tf.global_variables_initializer())
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_op_classifier, feed_dict={x: batch[0], label: batch[1]})
            if t % 550 == 0:
                epoch = int(t / 550)
                acc = {}
                acc['benign'] = sess.run(get_acc(x, label), feed_dict = {x: mnist.test.images, label: mnist.test.labels})
                acc['pre fgsm'] = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_mnist, label: mnist.test.labels})
                acc['pre gan'] = sess.run(get_acc(x, label), feed_dict={x: x_gan_mnist, label: mnist.test.labels})
                acc['pre jsma 1'] = sess.run(get_acc(x, label), feed_dict={x: x_jsma_mnist_1, label: mnist.test.labels[0:100,]})
                print(epoch, acc)
        '''
        sess.close()
        return
        # jsma
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_test}
        x_jsma_mnist = jsma.generate_np(mnist.test.images, **jsma_params)
        np.save(os.path.join('data','x_jsma_mnist.npy'), x_jsma_mnist)
    print('--------------------------------------')
    print('Attacking input %i/%i' % (i + 1, len(samples_to_perturb)))
    sample = X_test_scaled[sample_ind: sample_ind+1]

    # We want to find an adversarial example for each possible target class
    # (i.e. all classes that differ from the label given in the dataset)
    current_class = int(np.argmax(y_test[sample_ind]))
    target = 1 - current_class

    print('Generating adv. example for target class %i' % target)

    # This call runs the Jacobian-based saliency map approach
    one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
    one_hot_target[0, target] = 1
    jsma_params['y_target'] = one_hot_target
    adv_x = jsma.generate_np(sample, **jsma_params) # adversarial sample generated = adv_x
    adversarial_samples.append(adv_x)
    samples_perturbed_idxs.append(sample_ind)

    # Check if success was achieved
    adv_tgt = np.zeros((1, FLAGS.nb_classes)) # adversarial target = adv_tgt
    adv_tgt[:,target] = 1
    res = int(model_eval(sess, x, y, predictions, adv_x, adv_tgt, args={'batch_size': 1}))

    # Compute number of modified features
    adv_x_reshape = adv_x.reshape(-1)
    test_in_reshape = X_test_scaled[sample_ind].reshape(-1)
    nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
    percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

    # Update the arrays for later analysis
Beispiel #28
0
def evaluate_model(filepath,
                   attack=None,
                   preprocess=None,
                   batch_size=128,
                   num_threads=None):
    """
  Run evaluation on a saved model
  :param filepath: path to model to evaluate
  :param batch_size: size of evaluation batches
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = get_MNIST_67_preprocess(
        test_attack=attack)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {'batch_size': batch_size}

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_set,
                         y_set,
                         save_logit=True,
                         filename=report_key,
                         args=eval_params)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    # Load Model
    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0

    # Attack
    if attack == 'fgsm':
        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        # fgsm attack
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        fgsm = FastGradientMethod(model, sess=sess)
        # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in xrange(0, len(x_test)):
            sample = x_test[sample_ind:(sample_ind + 1)]
            adv_x = fgsm.generate_np(sample, **fgsm_params)
            x_test[sample_ind:(sample_ind + 1)] = adv_x

    elif attack == 'jsma':
        jsma = SaliencyMapMethod(model, sess=sess)
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }  # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in xrange(0, len(x_test)):
            sample = x_test[sample_ind:(sample_ind + 1)]
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            if y_test[sample_ind, 0] == 1:
                one_hot_target[0, 1] = 1
            else:
                one_hot_target[0, 0] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            x_test[sample_ind:(sample_ind + 1)] = adv_x
        plt.imshow(x_test[1], cmap='gray')
        plt.show()
        exit()
    # Image Process
    x_test = image_process(x_test, preprocess)

    preds = model.get_logits(x)
    fn = str(filepath[10:-7]) + "_" + str(preprocess) + "_" + str(attack)
    do_eval(preds, x_test, y_test, fn, True)
    with open('{}_y.pickle'.format(fn), 'wb') as handle:
        pickle.dump(y_test, handle)
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
Beispiel #30
0
    def get_adversarial_version(self,
                                x,
                                y=None,
                                eps=0.3,
                                iterations=100,
                                attack='FGSM',
                                targeted=False,
                                y_tar=None,
                                clip_min=0.0,
                                clip_max=1.0,
                                nb_candidate=10,
                                num_params=100):
        """
        Desc:
            Caclulate the adversarial version for point x using FGSM
            x: matrix of n x input_shape samples
            y: matrix of n x input_label samples
            eps: used for FGSM
            attack: FGMS or CW
        
        """
        if self.dataset == 'cifar10':
            model = KerasModelWrapper(self.model)
        else:
            model = KerasModelWrapper(self.model.model)
        if attack == 'CW-l2':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL2(model, sess=self.sess)

            cw_params = {
                'batch_size': 10,
                'confidence': 0,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'CW-l0':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL0(model, sess=self.sess)

            cw_params = {
                'batch_size': 1,
                'confidence': 0.,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'DF':
            K.set_learning_phase(0)
            df = DeepFool(model, sess=self.sess)
            df_params = {'nb_candidate': nb_candidate}
            x_adv = df.generate_np(x, **df_params)

        elif attack == 'JSMA':
            K.set_learning_phase(0)
            jsma = SaliencyMapMethod(model, sess=self.sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.03,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = jsma.generate_np(x, **jsma_params)

        elif attack == 'FGSM':
            K.set_learning_phase(0)
            fgsm = FastGradientMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.15,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        elif attack == 'BIM':
            K.set_learning_phase(0)
            fgsm = BasicIterativeMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.015,
                'eps_iter': 0.005,
                'nb_iter': 100,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        return x_adv
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, nb_classes=10, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, channels)),
                    np.reshape(adv_x, (img_rows, img_cols, channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
Beispiel #32
0
                          dtype=np.float)
        # sample = np.array([sample])
        sess = keras.backend.get_session()
        model = KerasModelWrapper(model_keras)
        attack = SaliencyMapMethod(model, sess=sess)
        # print(model.predict(panda.reshape(1, *panda.shape)))

        param = dict(
            theta=1.,
            gamma=1.,
            clip_min=0.,
            clip_max=1.,
            y_target=None,
            symbolic_impl=True,
        )
        advs = attack.generate_np(sample, **param)
        # plt.imsave("sample.png", advs[0])

        preb = model_keras.predict(advs).argmax(axis=1).reshape(
            (sample.shape[0], ))
        y_sample = model_keras.predict(sample).argmax(axis=1).reshape(
            (sample.shape[0], ))
        # y_sample = y_sample.reshape((y_sample.shape[0], ))

        success += (preb != y_sample).sum()
        print((preb != y_sample).sum())

        # denoise_adv = model_denoise.predict(advs.reshape(-1, 32, 32, 3))

        # denoise_pred = model_keras.predict(
        #     denoise_adv.reshape(-1, 32 * 32 * 3))