def train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model1', self.nb_classes, nb_filters)
        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())
Esempio n. 2
0
def prep_bbox(sess,
              x,
              y,
              x_train,
              y_train,
              x_test,
              y_test,
              nb_epochs,
              batch_size,
              learning_rate,
              rng,
              nb_classes=10,
              img_rows=28,
              img_cols=28,
              nchannels=1):
    """
  Define and train a model that simulates the "remote"
  black-box oracle described in the original paper.
  :param sess: the TF session
  :param x: the input placeholder for MNIST
  :param y: the ouput placeholder for MNIST
  :param x_train: the training data for the oracle
  :param y_train: the training labels for the oracle
  :param x_test: the testing data for the oracle
  :param y_test: the testing labels for the oracle
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param rng: numpy.random.RandomState
  :return:
  """

    # Define TF model graph (for the black-box model)
    nb_filters = 64
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    loss = CrossEntropy(model, smoothing=0.1)
    predictions = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          x_test,
                          y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
class AdverseCNN:
    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None
        self.preds_adv = None

    def get_data(self, train_start, train_end, test_start, test_end):
        mnist = MNIST(train_start=train_start, train_end=train_end,
                      test_start=test_start, test_end=test_end)
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]

    def adverse_train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters)
        fgsm = FastGradientMethod(self.model, sess=sess)

        def attack(x):
            return fgsm.generate(x, **self.fgsm_params)

        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack)

        adv_x = attack(x)
        self.preds_adv = self.model.get_logits(adv_x)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False)
        do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval')
        do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
    def adverse_train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters)
        fgsm = FastGradientMethod(self.model, sess=sess)

        def attack(x):
            return fgsm.generate(x, **self.fgsm_params)

        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack)

        adv_x = attack(x)
        self.preds_adv = self.model.get_logits(adv_x)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())
Esempio n. 5
0
    def prepblackbox(self, nb_filters):
        model = ModelBasicCNN('model1', self.nb_classes, nb_filters)
        loss = CrossEntropy(model, smoothing=0.1)
        predictions = model.get_logits(x)
        print("Defined TensorFlow model graph.")

        train(sess, loss, self.x_train, self.y_train, args=self.train_params, 
              rng=self.range)

        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, predictions, self.x_test, self.y_test,
                              args=eval_params)
        print('Test accuracy of black-box on legitimate test '
              'examples: ' + str(accuracy))

        return model, predictions, accuracy
Esempio n. 6
0
def __test():
    # report = AccuracyReport()
    tf.set_random_seed(1234)
    sess = tf.Session()
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")
    # Train an MNIST model
    train_params = {
        'nb_epochs': NB_EPOCHS,
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'filename': os.path.split(MODEL_PATH)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': BATCH_SIZE}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
Esempio n. 7
0
def mnist_tutorial_adv_train(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000,
                             viz_enabled=VIZ_ENABLED,
                             nb_epochs=NB_EPOCHS,
                             batch_size=BATCH_SIZE,
                             source_samples=SOURCE_SAMPLES,
                             learning_rate=LEARNING_RATE,
                             attack_iterations=ATTACK_ITERATIONS,
                             model_path=MODEL_PATH,
                             targeted=TARGETED,
                             noise_output=NOISE_OUTPUT):
    """
  MNIST tutorial for Adversarial Training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using FGSM - BIM - MIM approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    fgsm = FastGradientMethod(model, sess=sess)
    bim = BasicIterativeMethod(model, sess=sess)
    mim = MomentumIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }
    mim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv_fgsm = fgsm.generate_np(adv_inputs, **fgsm_params)
    adv_bim = bim.generate_np(adv_inputs, **bim_params)
    adv_mim = mim.generate_np(adv_inputs, **mim_params)
    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_fgsm_accuracy = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       adv_fgsm,
                                       adv_ys,
                                       args=eval_params)
        adv_bim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_bim,
                                      adv_ys,
                                      args=eval_params)
        adv_mim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_mim,
                                      adv_ys,
                                      args=eval_params)

    else:
        if viz_enabled:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[idxs],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[idxs],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[idxs],
                                 args=eval_params)
            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim
        else:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[:source_samples],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[:source_samples],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[:source_samples],
                                 args=eval_params)

            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. (FGSM) examples {0:.4f}'.format(
        adv_fgsm_accuracy))
    report.clean_train_adv_fgsm_eval = 1. - adv_fgsm_accuracy
    print('Avg. rate of successful adv. (BIM) examples {0:.4f}'.format(
        adv_bim_accuracy))
    report.clean_train_adv_bim_eval = 1. - adv_bim_accuracy
    print('Avg. rate of successful adv. (MIM) examples {0:.4f}'.format(
        adv_mim_accuracy))
    report.clean_train_adv_mim_eval = 1. - adv_mim_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed_fgsm = np.mean(
        np.sum((adv_fgsm - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (FGSM) perturbations {0:.4f}'.format(
        percent_perturbed_fgsm))
    percent_perturbed_bim = np.mean(
        np.sum((adv_bim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (BIM) perturbations {0:.4f}'.format(
        percent_perturbed_bim))
    percent_perturbed_mim = np.mean(
        np.sum((adv_mim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (MIM) perturbations {0:.4f}'.format(
        percent_perturbed_mim))

    ###########################################################################
    # Adversarial Training
    ###########################################################################

    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)

    fgsm2 = FastGradientMethod(model, sess=sess)

    # bim2 = BasicIterativeMethod(model, sess=sess)
    # mim2 = MomentumIterativeMethod(model, sess=sess)

    def attack_fgsm(x):
        return fgsm2.generate(adv_inputs, **fgsm_params)

    # def attack_bim(x):
    #   return bim2.generate(adv_inputs, **bim_params)
    # def attack_mim(x):
    #   return mim2.generate(adv_inputs, **mim_params)

    preds2 = model2.get_logits(x)
    loss2_fgsm = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)
    # loss2_bim = CrossEntropy(model2, smoothing=0.1, attack=attack_bim)
    # loss2_mim = CrossEntropy(model2, smoothing=0.1, attack=attack_mim)

    train(sess, loss2_fgsm, x_train, y_train, args=train_params, rng=rng)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on adversarial fgsm test examples: {0}'.format(
        accuracy))
    report.clean_train_clean_eval = accuracy
    print("Defined TensorFlow model graph.")

    adv_fgsm_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   adv_fgsm,
                                   adv_ys,
                                   args=eval_params)
    adv_bim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_bim,
                                  adv_ys,
                                  args=eval_params)
    adv_mim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_mim,
                                  adv_ys,
                                  args=eval_params)

    # Close TF session
    sess.close()

    return report
Esempio n. 8
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {'allow_soft_placement': True}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_fgsm_x = fgsm.generate(x, **fgsm_params)
        preds_adv_fgsm = model.get_logits(adv_fgsm_x)

        # Generate fgsm adversarial examples and save to disk
        dir = 'images/fgsm_adv/'
        if not os.path.exists('images'):
            os.mkdir('images')
        if not os.path.exists(dir):
            os.mkdir(dir)
        if not os.path.exists(dir + 'train/'):
            os.mkdir(dir + 'train/')
        if not os.path.exists(dir + 'test/'):
            os.mkdir(dir + 'test/')
        for index in range(len(y_test)):
            print('test ' + str(index))
            x_ = x_test[index]
            label = np.argmax(y_test[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'test/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')
        for index in range(len(y_train)):
            print('train ' + str(index))
            x_ = x_train[index]
            label = np.argmax(y_train[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'train/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')

    return report
def mnist_tutorial_fgsm(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output=NOISE_OUTPUT):
  """
  MNIST tutorial for Fast Gradient Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a FGSM attack object
  fgsm = FastGradientMethod(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
    yname = "y_target"
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None
    yname = "y"

  if targeted:
    fgsm_params_batch_size = source_samples * nb_classes
  else:
    fgsm_params_batch_size = source_samples
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv = fgsm.generate_np(adv_inputs,
                       **fgsm_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))


  ###########################################################################
  # Adversarial Training
  ###########################################################################

  model2 = ModelBasicCNN('model2', nb_classes, nb_filters) 
  
  fgsm2 = FastGradientMethod(model2, sess=sess)

  def attack_fgsm(x):
    return fgsm2.generate(adv_inputs, **fgsm_params)

  preds2 = model2.get_logits(x)
  loss2 = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)

  train(sess, loss2, x_train, y_train, args=train_params, rng=rng)
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on adversarial fgsm test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy
  print("Defined TensorFlow model graph.")

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    # _ = grid_visual(grid_viz_data)
    # cleverhans_image.save("output", grid_viz_data)
    if noise_output:
      image_name = "output/fgsm_mnist_noise.png"
    else:
      image_name = "output/fgsm_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED):
    """
  MNIST tutorial for Carlini and Wagner's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    if targeted:
        cw_params_batch_size = source_samples * nb_classes
    else:
        cw_params_batch_size = source_samples
    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[idxs],
                             args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[:source_samples],
                             args=eval_params)
            adv_accuracy = 1 - err

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
Esempio n. 11
0
import argparse
import tensorflow as tf
from cleverhans.dataset import MNIST
from cleverhans.attacks import FastGradientMethod
from cleverhans.model_zoo.basic_cnn import ModelBasicCNN

parser = argparse.ArgumentParser()
parser.add_argument('--attack', required=True, choices=['fgsm'])

args = parser.parse_args()
sess = tf.Session(config=tf.ConfigProto())

if args.attack == 'fgsm':
    model = ModelBasicCNN('model1', 10, 64)
    attack = FastGradientMethod(model, sess=sess)

mnist = MNIST(train_start=0, train_end=60000)
x_train, y_train = mnist.get_set('train')

fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1, 'y_target': 0}

for i in range(len(x_train)):
    new_x = attack.generate(x_train[i], **fgsm_params)
    print(x_train[i])
    print(new_x)
    print('\n')
Esempio n. 12
0
def main(argv=None):
  
  from cleverhans_tutorials import check_installation
  check_installation(__file__)
  
  if not os.path.exists( CONFIG.SAVE_PATH ):
    os.makedirs( CONFIG.SAVE_PATH )
  save_path_data = CONFIG.SAVE_PATH + 'data/'
  if not os.path.exists( save_path_data ):
    os.makedirs( save_path_data )
  model_path = CONFIG.SAVE_PATH + '../all/' +  CONFIG.DATASET + '/'
  if not os.path.exists( model_path ):
    os.makedirs( model_path )
    os.makedirs( model_path + 'data/' )
  
  nb_epochs = FLAGS.nb_epochs
  batch_size = FLAGS.batch_size
  learning_rate = FLAGS.learning_rate
  nb_filters = FLAGS.nb_filters
  len_x = int(CONFIG.NUM_TEST/2)
  
  start = time.time()

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set seeds to improve reproducibility
  if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10':
    tf.set_random_seed(1234)
    np.random.seed(1234)
    rd.seed(1234)
  elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims':
    tf.set_random_seed(13)
    np.random.seed(1234)
    rd.seed(0)          
  
  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
  tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 
  sess = tf.Session(config=tf_config)   
  
  if CONFIG.DATASET == 'mnist':
    # Get MNIST data
    mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
  elif CONFIG.DATASET == 'cifar10':
    # Get CIFAR10 data
    data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')                             
  elif CONFIG.DATASET == 'moon':
    # Create a two moon example
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                   y_train1, x_test1,
                                                                   y_test1)
  elif CONFIG.DATASET == 'dims':
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                     y_train1,x_test1,
                                                                     y_test1)
    x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS)

  np.save(os.path.join(save_path_data, 'x_test'), x_test)
  np.save(os.path.join(save_path_data, 'y_test'), y_test)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': 1}
  rng = np.random.RandomState([2017, 8, 30])
  
  with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi:

    def do_eval(adv_x, preds, x_set, y_set, report_key):
      acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set,
                                          y_set, args=eval_params)
      setattr(report, report_key, acc)
      if report_key:
        print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi)
      return pred_np, adv_x_np
    
    if CONFIG.DATASET == 'mnist':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelBasicCNN('model1', nb_classes, nb_filters)
    elif CONFIG.DATASET == 'cifar10':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                    input_shape=[32, 32, 3])
    elif CONFIG.DATASET == 'moon':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelMLP('model1', nb_classes)
    elif CONFIG.DATASET == 'dims':
      trained_model_path = save_path_data + 'trained_model'
      model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS)
      
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    
    def evaluate():
      _, _ = do_eval(x, preds, x_test, y_test, 'test during train')
    
    if os.path.isfile( trained_model_path + '.index' ):
      tf_model_load(sess, trained_model_path)
    else:
      if CONFIG.DATASET == 'mnist':
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'cifar10':
        train(sess, loss, None, None,
              dataset_train=dataset_train, dataset_size=dataset_size,
              evaluate=evaluate, args=train_params, rng=rng,
              var_list=model.get_params())
      elif CONFIG.DATASET == 'moon':
        train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'dims':
        train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      saver = tf.train.Saver()
      saver.save(sess, trained_model_path)
    
    # Evaluate the accuracy on test examples
    if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ):
      logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy')
    else:
      _, _ = do_eval(x, preds, x_train, y_train, 'train')
      logits_0, _ = do_eval(x, preds, x_test, y_test, 'test')
      np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) 
    
    if CONFIG.DATASET == 'moon':
      num_grid_points = 5000
      if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ):
        x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy')
        logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy')
      else:
        xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) 
        x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T
        y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64')
        x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1)
        logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh')
        x_mesh = np.squeeze(x_mesh)
        np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh)
        np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh)
        
    points_x = x_test[:len_x]
    points_y = y_test[:len_x]
    points_x_bar = x_test[len_x:]
    points_y_bar = y_test[len_x:] 
     
    # Initialize the CW attack object and graph
    cw = CarliniWagnerL2(model, sess=sess) 
    
    # first attack
    attack_params = {
        'learning_rate': CONFIG.CW_LEARNING_RATE,
        'max_iterations': CONFIG.CW_MAX_ITERATIONS
      }
    
    if CONFIG.DATASET == 'moon':
     
      out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path)
      attack_params['const_a_min'] = out_a
      attack_params['const_a_max'] = 100
    
    adv_x = cw.generate(x, **attack_params) 
      
    if os.path.isfile( save_path_data + 'images_once_attacked.npy' ):
      adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy')
      logits_1 = np.load(save_path_data + 'logits_once_attacked.npy')
    else:
      #Evaluate the accuracy on adversarial examples
      preds_adv = model.get_logits(adv_x)
      logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar,
                                    'test once attacked')
      np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1)
      np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1)
      
    # counter attack 
    attack_params['max_iterations'] = 1024
      
    if CONFIG.DATASET == 'moon':  
      
      out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test),
                                                np.squeeze(adv_img_1), model_path,
                                                CONFIG.SAVE_PATH)
      attack_params['learning_rate'] = out_alpha2
      attack_params['const_a_min'] = -1
      attack_params['max_iterations'] = 2048
      
      plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh,
                logits_mesh)
      
    adv_adv_x = cw.generate(x, **attack_params) 
      
    x_k = np.concatenate((points_x, adv_img_1), axis=0)
    y_k = np.concatenate((points_y, logits_1), axis=0)
    
    if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ):
      adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy')
      logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy')
    else:
      # Evaluate the accuracy on adversarial examples
      preds_adv_adv = model.get_logits(adv_adv_x)
      logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k,
                                    'test twice attacked')   
      
      np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2)
      np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2)
    
    if CONFIG.DATASET == 'moon':  
      plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x],
                CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh)
      plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:],
                CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh)
      test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2,
                 CONFIG.SAVE_PATH)
 
  compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x],
                    logits_2[:len_x], CONFIG.SAVE_PATH) 
  
  if x_test.shape[-1] > 1:
    num_axis=(1,2,3)
  else:
    num_axis=(1,2)
    
  D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis)))
  D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]),
                                    axis=num_axis)))
  D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:],
                                logits_0[:len_x], logits_2[:len_x])
      
  if D_p_mod != [] and D_p_p_mod != []:
    plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
      
  plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH)
  
  print('Time needed:', time.time()-start)

  return report
Esempio n. 13
0
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0,
                     test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS, num_threads=None,
                     label_smoothing=0.1):
  """
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  # data = MNIST(train_start=train_start, train_end=train_end,
  #                test_start=test_start, test_end=test_end)

  # Get Fashion MNIST test data
  data = keras.datasets.fashion_mnist
  (x_train, y_train), (x_test, y_test) = data.load_data()
  x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
  x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
  y_train = np_utils.to_categorical(y_train, 10)
  y_test = np_utils.to_categorical(y_test, 10)
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255

  # dataset_size = data.x_train.shape[0]
  # dataset_train = data.to_tensorflow()[0]
  # dataset_train = dataset_train.map(
  #     lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  # dataset_train = dataset_train.batch(batch_size)
  # dataset_train = dataset_train.prefetch(16)
  # x_train, y_train = data.get_set('train')
  # x_test, y_test = data.get_set('test')
  

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }
  rng = np.random.RandomState([2017, 8, 30])

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  if clean_train:
    # model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32,32,3])
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def evaluate():
      do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)
    train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng)
    # train(sess, loss, None, None,
    #       dataset_train=dataset_train, dataset_size=dataset_size,
    #       evaluate=evaluate, args=train_params, rng=rng,
    #       var_list=model.get_params())

    # Calculate training error
    if testing:
      do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    # fgsm = FastGradientMethod(model, sess=sess)
    # fgsm = BasicIterativeMethod(model, sess=sess)
    fgsm = MomentumIterativeMethod(model, sess=sess)

    #fgsm = SaliencyMapMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)

    # x_train = x_train[:100]
    # adversarial_images = adv_x.eval(session=sess, feed_dict={x: x_train})
    # adversarial_labels = preds_adv.eval(session=sess, feed_dict = {x: x_train})

    # for i in range(20):
    #   ori_images = x_train[i]
    #   adv_images = adversarial_images[i]
    #   y_true_label = np.argmax(y_train[i])
    #   y_adv_label = np.argmax(adversarial_labels[i])
     
    # Evaluate the accuracy of the MNIST model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

    # Calculate training error
    if testing:
      do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

    print('Repeating the process, using adversarial training')

  # Create a new model and train it to be robust to FastGradientMethod
  # model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32,32,3])
  model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
  # fgsm2 = FastGradientMethod(model2, sess=sess)
  # fgsm2 = BasicIterativeMethod(model2, sess=sess)
  fgsm2 = MomentumIterativeMethod(model2, sess=sess)
  

  def attack(x):
    return fgsm2.generate(x, **fgsm_params)

  loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
  preds2 = model2.get_logits(x)
  adv_x2 = attack(x)

  if not backprop_through_attack:
    # For the fgsm attack used in this tutorial, the attack has zero
    # gradient so enabling this flag does not change the gradient.
    # For some other attacks, enabling this flag increases the cost of
    # training, but gives the defender the ability to anticipate how
    # the atacker will change their strategy in response to updates to
    # the defender's parameters.
    adv_x2 = tf.stop_gradient(adv_x2)
  preds2_adv = model2.get_logits(adv_x2)

  def evaluate2():
    # Accuracy of adversarially trained model on legitimate test inputs
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    # Accuracy of the adversarially trained model on adversarial examples
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

  # Perform and evaluate adversarial training
  train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng)

  # train(sess, loss2, None, None,
  #       dataset_train=dataset_train, dataset_size=dataset_size,
  #       evaluate=evaluate2, args=train_params, rng=rng,
  #       var_list=model2.get_params())

  # Calculate training errors
  if testing:
    do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
    do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

  return report
Esempio n. 14
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    x_train1, y_train1 = get_train(FLAGS.train1)
    x_test1, y_test1 = get_test(FLAGS.test1)
    x_train, y_train = x_train1, y_train1
    x_test, y_test = x_test1, y_test1
    if (FLAGS.train2):
        x_train2, y_train2, x_test2, y_test2 = get_train(FLAGS.train2)
        x_train, y_train = zip_trains(x_train1, y_train1, x_train2, y_train2,
                                      0.5)
        x_test, y_test = zip_tests(x_test1, y_test1, x_test2, y_test2, 0.5)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    print(x)
    print(y)
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())
        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    return report
Esempio n. 15
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
  """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  nb_filters = 64
  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  sess.run(tf.global_variables_initializer())
  rng = np.random.RandomState([2017, 8, 30])
  train(sess, loss, x_train, y_train, args=train_params, rng=rng)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using the Jacobian-based saliency map approach
  ###########################################################################
  print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
        ' adversarial examples')

  # Keep track of success (adversarial example classified in target)
  results = np.zeros((nb_classes, source_samples), dtype='i')

  # Rate of perturbed features for each test set example and target class
  perturbations = np.zeros((nb_classes, source_samples), dtype='f')

  # Initialize our array for grid visualization
  grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
  grid_viz_data = np.zeros(grid_shape, dtype='f')

  # Instantiate a SaliencyMapMethod attack object
  jsma = SaliencyMapMethod(model, sess=sess)
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}

  figure = None
  # Loop over the samples we want to perturb into adversarial examples
  for sample_ind in xrange(0, source_samples):
    print('--------------------------------------')
    print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
    sample = x_test[sample_ind:(sample_ind + 1)]

    # We want to find an adversarial example for each possible target class
    # (i.e. all classes that differ from the label given in the dataset)
    current_class = int(np.argmax(y_test[sample_ind]))
    target_classes = other_classes(nb_classes, current_class)

    # For the grid visualization, keep original images along the diagonal
    grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        sample, (img_rows, img_cols, nchannels))

    # Loop over all target classes
    for target in target_classes:
      print('Generating adv. example for target class %i' % target)

      # This call runs the Jacobian-based saliency map approach
      one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
      one_hot_target[0, target] = 1
      jsma_params['y_target'] = one_hot_target
      adv_x = jsma.generate_np(sample, **jsma_params)

      # Check if success was achieved
      res = int(model_argmax(sess, x, preds, adv_x) == target)

      # Computer number of modified features
      adv_x_reshape = adv_x.reshape(-1)
      test_in_reshape = x_test[sample_ind].reshape(-1)
      nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
      percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

      # Display the original and adversarial images side-by-side
      if viz_enabled:
        figure = pair_visual(
            np.reshape(sample, (img_rows, img_cols, nchannels)),
            np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

      # Add our adversarial example to our grid data
      grid_viz_data[target, current_class, :, :, :] = np.reshape(
          adv_x, (img_rows, img_cols, nchannels))

      # Update the arrays for later analysis
      results[target, sample_ind] = res
      perturbations[target, sample_ind] = percent_perturb

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  nb_targets_tried = ((nb_classes - 1) * source_samples)
  succ_rate = float(np.sum(results)) / nb_targets_tried
  print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
  report.clean_train_adv_eval = 1. - succ_rate

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(perturbations)
  print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

  # Compute the average distortion introduced for successful samples only
  percent_perturb_succ = np.mean(perturbations * (results == 1))
  print('Avg. rate of perturbed features for successful '
        'adversarial examples {0:.4f}'.format(percent_perturb_succ))

  # Close TF session
  sess.close()

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    import matplotlib.pyplot as plt
    plt.close(figure)
    _ = grid_visual(grid_viz_data)

  return report
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(path=file,
                  train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # x_train = x_train[0:1].reshape(784)
    # k = np.unique(x_train.reshape(-1, 784))
    # k = list(set(x_train.reshape(784)))
    # nk = [k.index(x_train[x]) for x in len(x_train)]
    # print(k, np.shape(k), nk)

    ###############################
    # Transform image to uniimage #
    ###############################
    # x_train = convert_uniimage(x_train)
    # x_test = transform_4_in_1(x_test)
    # trans_x_text = np.copy(x_test)
    # x_test = convert_uniimage(x_test)
    # uni_x_test = np.copy(x_test)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': save_dir,
        'filename': filename,
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                ae=None,
                type=None,
                datasetName=None,
                discretizeColor=1):
        accuracy, distortion = model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          x_set,
                                          y_set,
                                          args=eval_params,
                                          is_adv=is_adv,
                                          ae=ae,
                                          type=type,
                                          datasetName=datasetName,
                                          discretizeColor=discretizeColor)
        setattr(report, report_key, accuracy)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'

        if report_text:
            print('Test accuracy on %s examples: %0.4f' %
                  (report_text, accuracy))

        return accuracy, distortion

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds,
                    x_test,
                    y_test,
                    'clean_train_clean_eval',
                    False,
                    type=type,
                    datasetName="MNIST",
                    discretizeColor=discretizeColor)

        saveFileNumArr = []
        # saveFileNumArr = [50, 500, 1000]

        count = 0
        appendNum = 50
        while count < 1000:
            count = count + appendNum
            saveFileNumArr.append(count)

        distortionArr = []
        accuracyArr = []
        for i in range(len(saveFileNumArr)):
            saveFileNum = saveFileNumArr[i]
            model_path = os.path.join(save_dir,
                                      filename + "-" + str(saveFileNum))
            print("Trying to load trained model from: " + model_path)
            if os.path.exists(model_path + ".meta"):
                tf_model_load(sess, model_path)
                print("Load trained model")
            else:
                # train(sess, loss, x_train, y_train, evaluate=evaluate,
                #       args=train_params, rng=rng, var_list=model.get_params())
                train_with_noise(sess,
                                 loss,
                                 x_train,
                                 y_train,
                                 evaluate=evaluate,
                                 args=train_params,
                                 rng=rng,
                                 var_list=model.get_params(),
                                 save=save,
                                 type=type,
                                 datasetName="MNIST",
                                 retrain=retrain,
                                 discretizeColor=discretizeColor)

            # Calculate training error
            accuracy, distortion = do_eval(preds,
                                           x_test,
                                           y_test,
                                           'train_clean_train_clean_eval',
                                           False,
                                           type=type,
                                           datasetName="MNIST",
                                           discretizeColor=discretizeColor)

            # Initialize the Fast Gradient Sign Method (FGSM) attack object and
            # graph
            fgsm = FastGradientMethod(model, sess=sess)
            # fgsm = BasicIterativeMethod(model, sess=sess)
            # fgsm = MomentumIterativeMethod(model, sess=sess)
            # fgsm_params = {
            #   'clip_min': 0.,
            #   'clip_max': 1.,
            #   'verbose': False
            # }
            # fgsm = HopSkipJumpAttack(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            # adv_x = fgsm.generate_np(x, **fgsm_params)
            # adv = sess.run(adv_x, feed_dict={x: x_test})
            preds_adv = model.get_logits(adv_x)
            # print(sess.run(preds_adv, feed_dict={x: x_test}))

            #############################
            # Create adversarial images #
            #############################
            # We have to produce adversarial image 1 by 1 by using HopSkipJumpAttack
            # adv_test = []
            # for i in range(len(x_test)):
            #   tmp_adv_test = sess.run(adv_x, feed_dict={x: [x_test[i]]})
            #   adv_test.append(tmp_adv_test[0])
            #   if (i+1) % 100 == 0:
            #     print((i+1),"/",len(x_test), " adversarial images")
            #
            # adv_test = np.array(adv_test)
            # print(np.shape(adv_test))

            # Evaluate the accuracy of the MNIST model on adversarial examples
            # do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
            # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x,
            #                                type=type, datasetName="MNIST", discretizeColor=discretizeColor)
            # do_eval(preds, adv_test, y_test, 'clean_train_adv_eval', True)

            distortionArr.append(distortion)
            accuracyArr.append(accuracy)
            # print(str(accuracy))
            # print(str(distortion))

        print("accuracy:")
        for accuracy in accuracyArr:
            print(accuracy)

        print("distortion:")
        for distortion in distortionArr:
            print(distortion)

    return report
Esempio n. 17
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED):
  """
  MNIST tutorial for Carlini and Wagner's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(path=file, train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]


  saveFileNumArr = []
  # saveFileNumArr = [50, 500, 1000]

  count = 0
  while count < 1000:
    count = count + 50
    saveFileNumArr.append(count)

  distortionArr = []
  accuracyArr = []
  for i in range(len(saveFileNumArr)):
    saveFileNum = saveFileNumArr[i]
    model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum))
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])

    print("Trying to load trained model from: " + model_path)
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
      tf_model_load(sess, model_path)
      print("Load trained model")
    else:
      train(sess, loss, x_train, y_train, args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    # accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    # assert x_test.shape[0] == test_end - test_start, x_test.shape
    # print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    # report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
      assert source_samples == nb_classes
      idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
              for i in range(nb_classes)]
    if targeted:
      if viz_enabled:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                      nchannels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        adv_inputs = np.array(
            [[instance] * nb_classes for instance in x_test[idxs]],
            dtype=np.float32)
      else:
        adv_inputs = np.array(
            [[instance] * nb_classes for
             instance in x_test[:source_samples]], dtype=np.float32)

      one_hot = np.zeros((nb_classes, nb_classes))
      one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

      adv_inputs = adv_inputs.reshape(
          (source_samples * nb_classes, img_rows, img_cols, nchannels))
      adv_ys = np.array([one_hot] * source_samples,
                        dtype=np.float32).reshape((source_samples *
                                                   nb_classes, nb_classes))
      yname = "y_target"
    else:
      if viz_enabled:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        adv_inputs = x_test[idxs]
      else:
        adv_inputs = x_test[:source_samples]
        adv_inputs = x_test

      adv_ys = None
      yname = "y"

    if targeted:
      cw_params_batch_size = source_samples * nb_classes
    else:
      cw_params_batch_size = source_samples
    cw_params = {'binary_search_steps': 1,
                 'max_iterations': attack_iterations,
                 'learning_rate': CW_LEARNING_RATE,
                 'batch_size': cw_params_batch_size,
                 'initial_const': 10}

    adv2 = cw.generate(x, **cw_params)
    cw_params[yname] = adv_ys
    adv_x = None
    # adv_x = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
      accuracy = model_eval(
          sess, x, y, preds, adv_x, adv_ys, args=eval_params)
    else:
      # err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
      #                  args=eval_params)
      accuracy, distortion = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params, is_adv=True, ae=adv2,
                                        type=type, datasetName="MNIST", discretizeColor=discretizeColor)

    print('--------------------------------------')
    print("load save file: ", saveFileNum)
    # Compute the number of adversarial examples that were successfully found
    # print('Test with adv. examples {0:.4f}'.format(adv_accuracy))
    print('Test accuracy on examples: %0.4f ,distortion: %0.4f' % (accuracy, distortion))

    distortionArr.append(distortion)
    accuracyArr.append(accuracy)
    # print(str(accuracy))
    # print(str(distortion))
    tf.reset_default_graph()

  print("accuracy:")
  for accuracy in accuracyArr:
    print(accuracy)

  print("distortion:")
  for distortion in distortionArr:
    print(distortion)

  # Close TF session
  sess.close()

  return report
Esempio n. 18
0
class CleanCNN:

    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None

    '''
    def get_data(self, train_start, train_end, test_start, test_end):
        train = np.genfromtxt("drive/My Drive/train.csv", delimiter=',')
        test = np.genfromtxt("drive/My Drive/test.csv", delimiter=',')
        self.x_test = test[:,1:].astype(int)
        self.y_test = oneHotEncodeY(test[:,0].astype(int),47)
        self.x_train = train[:,1:].astype(int)
        
        self.y_train = oneHotEncodeY(train[:,0].astype(int),47)

        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]

        
        mnist = MNIST(train_start=train_start, train_end=train_end,
                      test_start=test_start, test_end=test_end)
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        '''

    def get_data(self):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        #print(np.amax(self.y_train))
        #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        #self.x_sub = self.x_test[:s0]
        #self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        #self.x_test = self.x_test[s0:]
        #self.y_test = self.y_test[s0:]

    def train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model1', self.nb_classes, nb_filters)
        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'clean_train_clean_eval', False)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_clean_train_clean_eval')

    def adverserial_testing(self):
        fgsm = FastGradientMethod(self.model, sess=sess)
        adv_x = fgsm.generate(x, **self.fgsm_params)
        preds_adv = self.model.get_logits(adv_x)

        #Call from mail
        do_eval(preds_adv, self.eval_params, self.x_test, self.y_test, 'clean_train_adv_eval', True)
        do_eval(preds_adv, self.eval_params, self.x_train, self.y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')
Esempio n. 19
0
class AdverseCNN:
    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None
        self.preds_adv = None

    def get_data(self):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        print(np.amax(self.y_train))
        print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        print("//////////////////////////////")
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        #self.x_sub = self.x_test[:s0]
        #self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        #self.x_test = self.x_test[s0:]
        #self.y_test = self.y_test[s0:]

    def adverse_train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters)
        fgsm = FastGradientMethod(self.model, sess=sess)

        def attack(x):
            return fgsm.generate(x, **self.fgsm_params)

        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack)

        adv_x = attack(x)
        self.preds_adv = self.model.get_logits(adv_x)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False)
        do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval')
        do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
Esempio n. 20
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
Esempio n. 21
0
def fashion_tutorial_jsma(train_start=0,
                          train_end=60000,
                          test_start=0,
                          test_end=10000,
                          viz_enabled=VIZ_ENABLED,
                          nb_epochs=NB_EPOCHS,
                          batch_size=BATCH_SIZE,
                          source_samples=SOURCE_SAMPLES,
                          learning_rate=LEARNING_RATE,
                          model_path=MODEL_PATH,
                          noise_output=NOISE_OUTPUT):
    """
  Fashion MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get Fashion MNIST test data
    fashion = keras.datasets.fashion_mnist
    (x_train, y_train), (x_test, y_test) = fashion.load_data()
    # cifar10 = CIFAR10(train_start=train_start, train_end=train_end,
    #               test_start=test_start, test_end=test_end)
    # x_train, y_train = cifar10.get_set('train')
    # x_test, y_test = cifar10.get_set('test')
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
    y_train = np_utils.to_categorical(y_train, 10)
    y_test = np_utils.to_categorical(y_test, 10)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an Fashion MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the Fashion MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    # Loop over the samples we want to perturb into adversarial examples
    adv_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f')
    sample_all = np.zeros((nb_classes, img_rows, img_cols, nchannels),
                          dtype='f')
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_all[current_class] = adv_x
            sample_all[current_class] = sample

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            # if viz_enabled:
            #   figure = pair_visual(
            #       np.reshape(sample, (img_rows, img_cols, nchannels)),
            #       np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # # Add our adversarial example to our grid data
            # grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #     adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Compute the average distortion introduced by the algorithm
    l2_norm = np.mean(np.sum((adv_all - sample_all)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(l2_norm))

    for i in range(nb_classes):
        if noise_output:
            image = adv_all[i] - sample_all[i]
        else:
            image = adv_all[i]
        grid_viz_data[i, 0] = image

    # Close TF session
    sess.close()

    def save_visual(data, path):
        """
    Modified version of cleverhans.plot.pyplot
    """
        import matplotlib.pyplot as plt

        figure = plt.figure()
        # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = data.shape[0]
        num_rows = data.shape[1]
        num_channels = data.shape[4]
        for y in range(num_rows):
            for x in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (x + 1) + (y * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(data[x, y, :, :, 0], cmap='gray')
                else:
                    plt.imshow(data[x, y, :, :, :])

        # Draw the plot and return
        plt.savefig(path)

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        if noise_output:
            image_name = "output/jsma_fashion_mnist_noise.png"
        else:
            image_name = "output/jsma_fashion_mnist.png"
        _ = save_visual(grid_viz_data, image_name)

    return report