Example #1
0
class SaliencyMapAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 theta=1.0,
                 gamma=1.0,
                 clip_min=-1.0,
                 clip_max=1.0,
                 targeted=False,
                 symbolic_impl=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._theta = theta
        self._gamma = gamma
        self._symbolic_impl = symbolic_impl

        with self.graph.as_default():
            self._method = SaliencyMapMethod(self._model,
                                             sess=self.session,
                                             theta=self._theta,
                                             gamma=self._gamma,
                                             nb_classes=self._n_classes,
                                             clip_min=self._clip_min,
                                             clip_max=self._clip_max,
                                             symbolic_impl=self._symbolic_impl)

    def attack_method(self, labels):
        if self._targeted:
            return self._method.generate(x=self._x_clean, y_target=labels)
        return self._method.generate(x=self._x_clean)
Example #2
0
def main(_):
  # Images for inception classifier are normalized to be in [-1, 1] interval,
  # eps is a difference between pixels so it should be in [0, 2] interval.
  # Renormalizing epsilon from [0, 255] to [0, 2].
  eps = 2.0 * FLAGS.max_epsilon / 255.0
  batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
  num_classes = 1001
  BUILD_MODEL = True

  from cleverhans.attacks_tf import jacobian_graph, jsma_batch

  tf.logging.set_verbosity(tf.logging.INFO)

  with tf.Graph().as_default() as d_graph:
    # Prepare graph

    x_input = tf.placeholder(tf.float32, shape=batch_shape)
    model = InceptionModel(num_classes)

    preds = model(x_input)
    #grads = jacobian_graph(preds, x_input, num_classes)
    saver = tf.train.Saver(slim.get_model_variables())

    # Run computation
    with tf.Session() as sess:
      #print("Session is closed:",sess._is_closed())
      saver.restore(sess, FLAGS.checkpoint_path)

      salmap = SaliencyMapMethod(model, sess = sess)
      x_adv = salmap.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

      for filenames, images in load_images(FLAGS.input_dir, batch_shape):

        adv_images = sess.run(x_adv, feed_dict={x_input: images})
        save_images(adv_images, filenames, FLAGS.output_dir)
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    eps = 2.0 * FLAGS.max_epsilon / 255.0
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)

        model = InceptionModel(num_classes)

        salmap = SaliencyMapMethod(model)

        noisy_images = x_input + 0.05 * tf.sign(tf.random_normal(batch_shape))
        x_adv = salmap.generate(noisy_images, clip_min=-1., clip_max=1.)
        #x_adv = x_input + tf.clip_by_value(x_adv - x_input, -eps, eps)

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)
def _get_or_create_jsma(sess, x, classifier, in_x, save_file):
    if not os.path.exists(save_file):
        jsma = SaliencyMapMethod(classifier)
        adv = jsma.generate(x, clip_min=0., clip_max=1., gamma=FLAGS.gamma)
        jsma_adv = np.zeros(in_x.shape)
        for i in range(in_x.shape[0] // 10000):
            jsma_adv[i * 10000:(i + 1) * 10000] = adv.eval(
                feed_dict={x: in_x[i * 10000:(i + 1) * 10000]})
            print(f'done computing jsma axs for first {(i+1)*10000}')
        np.save(save_file, jsma_adv)
    else:
        return np.load(save_file)
Example #5
0
    def adv_jsma_train(self, epoch=5000):
        print('training {} for epoch={}'.format(self.MODEL_NAME, epoch))
        params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        x, y = self.cnn.make_inputs()
        probs = self.cnn.make_model(x)

        self.cnn.start_session()
        self.cnn.init_session_and_restore()

        jsma = SaliencyMapMethod(self.cnn, sess=self.cnn.sess)
        adv_x = jsma.generate(x, **params)
        adv_probs = self.cnn.make_model(adv_x)

        self.cnn.train(probs, x, y, epoch, self.dataset, adv_preds=adv_probs)
        # cnn.test(gtsrb)
        self.cnn.end_session()
Example #6
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS, num_threads=None,
                   label_smoothing=0.1):
  """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {'allow_soft_placement' : True}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get MNIST data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}
  rng = np.random.RandomState([2017, 8, 30])

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  if clean_train:
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def evaluate():
      do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng, var_list=model.get_params())

    # Calculate training error
    if testing:
      do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    jsma = SaliencyMapMethod(model, sess=sess)
    adv_jsma_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_logits(adv_jsma_x)

    # Generate JSMA adversarial examples and save to disk
    dir = 'images/jsma_rotated_adv/'
    if not os.path.exists('images'):
        os.mkdir('images')
    if not os.path.exists(dir):
        os.mkdir(dir)
    if not os.path.exists(dir+'train/'):
        os.mkdir(dir+'train/')
    if not os.path.exists(dir+'test/'):
        os.mkdir(dir+'test/')
    for index in range(len(y_test)):
        print('test '+str(index))
        x_ = x_test[index]
        label = np.argmax(y_test[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'test/' + str(label)+'_'+str(uuid.uuid4())+'.png')
    for index in range(len(y_train)):
        print('train ' + str(index))
        x_ = x_train[index]
        label = np.argmax(y_train[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'train/' + str(label)+'_'+str(uuid.uuid4())+'.png')



  return report
def baseline_jsma(train_start=0, train_end=60000, test_start=0,
                  test_end=10000, nb_epochs=6, batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    # assert Y_train.shape[1] == 10
    # label_smooth = .1
    # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        #
        # HERE already trained model, thus we need a new one (model_2)
        model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
                    args=train_params, rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the JSMA attack object and
        # graph
        jsma = SaliencyMapMethod(model, sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    jsma2 = SaliencyMapMethod(model_2, sess=sess)
    adv_x_2 = jsma2.generate(x, **jsma_params)
    preds_2_adv = model_2(adv_x_2)

    #
    # let's generate FGSM examples for model_2
    #
    fgsm = FastGradientMethod(model_2, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_2_fgsm = model_2(adv_x_fgsm)

    # DON'T WANT TO TRAIN on FGSM adv examples yet

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on JSMA adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

        # Accuracy of the JSMA adv trained model on FGSM adv examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2_fgsm, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on SaliencyMapMethod adversarial examples: %0.4f' % accuracy)

    # Perform and evaluate adversarial training
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
Example #8
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False,
         fgsm=False,
         jsma=False,
         df=False,
         bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("cifar_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 3072))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(
        str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)

    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on fgsm adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {
            'theta': 2.,
            'gamma': 0.145,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on jsma adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10, 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on df adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    # Commented out as it is hanging on batch #0 at the moment
    '''
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))
    '''
    print('Accuracy results outputted to cifar10_results.txt')
    text_file.close()

    # Close TF session
    sess.close()
Example #9
0
def main(_):
    tf.logging.set_verbosity(tf.logging.DEBUG)

    # Images for inception classifier are normalized to be in [-1, 1] interval,
    num_classes = 1001
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]

    # Load ImageNet Class Labels
    with open('labels.json') as f:
        labels = json.load(f)

    # Prepare Graph
    with tf.Graph().as_default():

        # Build Model
        if FLAGS.model_arch.lower() == 'resnet_v2_101':
            model = models.Resnet_V2_101_Model(num_classes)
            exceptions = []

        elif FLAGS.model_arch.lower() == 'inception_v3':
            model = models.Inception_V3_Model(num_classes)
            exceptions = ['InceptionV3/AuxLogits.*']

        else:
            raise ValueError('Invalid model architecture specified: {}'.format(
                FLAGS.model_arch))

        # Define Model Variables
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        FastGradientMethod(model).generate(x_input)
        model_variables = tf.contrib.framework.filter_variables(
            slim.get_model_variables(), exclude_patterns=exceptions)

        # Load Session
        saver = tf.train.Saver(model_variables)
        with tf.train.SessionManager().prepare_session(
                master=FLAGS.master,
                checkpoint_filename_with_path=FLAGS.checkpoint_path,
                saver=saver) as sess:

            # For Targeted Attacks
            target_idx = 0  # This will vary
            target = tf.constant(0, shape=[FLAGS.batch_size, num_classes])
            #      target = np.zeros((FLAGS.batch_size, num_classes), dtype=np.uint32)
            #      target[:, target] = 1

            # Build Attack
            if FLAGS.attack_type.lower() == 'fgsm':
                fgsm_opts = {
                    'eps': 0.3,
                    'clip_min': 0,
                    'clip_max': 1.,
                    'y_target': None
                }
                fgsm = FastGradientMethod(model)
                x_adv = fgsm.generate(x_input, **fgsm_opts)

            elif FLAGS.attack_type.lower() == 'bim':
                bim_opts = {
                    'eps': 0.3,
                    'clip_min': 0.,
                    'clip_max': 1.,
                    'y_target': None
                }
                bim = BasicIterativeMethod(model)
                x_adv = bim.generate(x_input, **bim_opts)

            elif FLAGS.attack_type.lower() == 'mim':
                mim_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                mim = MomentumIterativeMethod(model)
                x_adv = mim.generate(x_input, **mim_opts)

            elif FLAGS.attack_type.lower() == 'pgd':
                pgd_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                pgd = MadryEtAl(model)
                x_adv = pgd.generate(x_input, **pgd_opts)

            # Broken
            elif FLAGS.attack_type.lower() == 'jsma':
                jsma_opts = {
                    'theta': 1.,
                    'gamma': 0.1,
                    'clip-min': 0.,
                    'clip-max': 1.,
                    'y_target': None
                }
                jsma = SaliencyMapMethod(model)
                x_adv = jsma.generate(x_input, **jsma_opts)

            elif FLAGS.attack_type.lower() == 'lbfgs':
                lbfgs_opts = {'y_target': target}
                lbfgs = LBFGS(model)
                x_adv = lbfgs.generate(x_input, **lbfgs_opts)

            else:
                raise ValueError('Invalid attack type specified: {}'.format(
                    FLAGS.attack_type))

            start_time, batch_time, num_processed = time.time(), time.time(), 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)

                if FLAGS.show_predictions:
                    preds = sess.run(model(np.float32(images)))
                    probs = np.amax(preds, axis=1)
                    classes = np.argmax(preds, axis=1)
                    adv_preds = sess.run(model(adv_images))
                    adv_probs = np.amax(adv_preds, axis=1)
                    adv_classes = np.argmax(adv_preds, axis=1)

                    for i, _ in enumerate(filenames):
                        print('\nOriginal: {:.2f}% ({})\nAdversarial: {:.2f}% ({})'.format( \
                          probs[i]*100, labels[str(classes[i])], adv_probs[i]*100, labels[str(adv_classes[i])]))

                time_delta = time.time() - batch_time
                batch_time = time.time()
                num_processed += len(filenames)
                print('[SPEED ESTIMATION] BatchRate={:.4f} Hz; AverageRate={:.4f} Hz'.format( \
                  (len(filenames) / time_delta * 1.0), ((num_processed * 1.0) / (batch_time - start_time))))
Example #10
0
mnist = MnistProvider()
# mnist.dump_images()

cnn = CNNModel(
    image_size=mnist.IMAGE_SIZE,
    classes=mnist.CLASSES,
    model_name='mnist-28x28.1',
    model_dir='tmp/mnist_model-28x28.1',
    conv_layers=[32, 64],
    fc_layer=1024,
)

x, y = cnn.make_inputs()

probs = cnn.make_model(x)
cnn.start_session()

jsma = SaliencyMapMethod(cnn, sess=cnn.sess)
adv_x = jsma.generate(x, **jsma_params)
probs = cnn.make_model(adv_x)

cnn.adv_test(probs, x, y, adv_x, mnist.test_data(size=100))
# cnn.test(mnist)
cnn.end_session()

#cnn.test(2000, mnist)

# for i in range(100):
#     data, label = gtsrb.next_batch('test')
#     print(data, label)
Example #11
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    print('this line #1')
    eps = 2.0 * FLAGS.max_epsilon / 255.0
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)
    print('this line #2')
    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)

        model = InceptionModel(num_classes)
        print('this line #3')

        # # FGSM
        # fgsm = FastGradientMethod(model)
        # x_adv = fgsm.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # fgsm = FastGradientMethod(model, sess=sess)
        # fgsm_params = {'eps': 0.3,'clip_min': 0.,'clip_max': 1.}
        # adv_x = fgsm.generate(x, **fgsm_params)

        # (train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=True, nb_epochs=6, batch_size=128, nb_classes=10, source_samples=10, learning_rate=0.001, attack_iterations=100, model_path=os.path.join("models", "mnist"), targeted=True)

        # source_samples=10
        # viz_enabled=True
        # targeted=True
        # X_test=x_input
        # if viz_enabled:
        #     assert source_samples == nb_classes
        #     idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0]
        #             for i in range(nb_classes)]
        # if targeted:
        #     if viz_enabled:
        #         # Initialize our array for grid visualization
        #         grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
        #         grid_viz_data = np.zeros(grid_shape, dtype='f')

        #         adv_inputs = np.array(
        #             [[instance] * nb_classes for instance in X_test[idxs]],
        #             dtype=np.float32)
        #     else:
        #         adv_inputs = np.array(
        #             [[instance] * nb_classes for
        #              instance in X_test[:source_samples]], dtype=np.float32)

        #     one_hot = np.zeros((nb_classes, nb_classes))
        #     one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        #     adv_inputs = adv_inputs.reshape(
        #         (source_samples * nb_classes, img_rows, img_cols, 1))
        #     adv_ys = np.array([one_hot] * source_samples,
        #                       dtype=np.float32).reshape((source_samples *
        #                                                  nb_classes, nb_classes))
        #     yname = "y_target"
        # else:
        #     if viz_enabled:
        #         # Initialize our array for grid visualization
        #         grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
        #         grid_viz_data = np.zeros(grid_shape, dtype='f')

        #         adv_inputs = X_test[idxs]
        #     else:
        #         adv_inputs = X_test[:source_samples]

        #     adv_ys = None
        #     yname = "y"
        # # adv_ys = None
        # # yname = "y"
        # sess = tf.Session()
        # cw = CarliniWagnerL2(model, back='tf', sess=sess)
        # cw_params = {'binary_search_steps': 1, yname: adv_ys, 'max_iterations': 1000, 'learning_rate': 5e-3, 'batch_size': 1, 'initial_const': 1e-2}
        # # cw_params = {'binary_search_steps': 1, 'y': None, 'max_iterations': 1000, 'learning_rate': 5e-3, 'batch_size': 1, 'initial_const': 1e-2}
        # # cw_params = {'binary_search_steps': 1, 'y': None, 'max_iterations': 1000, 'learning_rate': 5e-3, 'batch_size': 1, 'initial_const': 1e-2}
        # # batch_size=1, confidence=0, targeted=True, learning_rate=5e-3, binary_search_steps=5, max_iterations=1000, abort_early=True, initial_const=1e-2, clip_min=0, clip_max=1, num_labels=3, shape=x_input.get_shape().as_list()[1:])
        # # x_adv = cw.generate_np(x_input,**cw_params)
        # x_adv = cw.generate(x_input,**cw_params)

        # # BIM
        # bim = BasicIterativeMethod(model)
        # x_adv = bim.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # # VAT
        # vat = VirtualAdversarialMethod(model)
        # x_adv = vat.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # # PGD
        # madry = MadryEtAl(model)
        # elastic_params = {'clip_min':-1., 'clip_max':1.}
        # x_adv = madry.generate(x_input, clip_min=-1., clip_max=1.)#, **elastic_params)

        # FFA
        # ffa = FastFeatureAdversaries(model)
        # x_adv = ffa.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # # JSMA
        # sess = tf.Session()
        jsma = SaliencyMapMethod(model)
        # # target = np.zeros((1,1000),dtype=np.float32)
        # # target[0,50] = 1                    #here, we suppose that the target label is 50
        # # jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': target}
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': -1.,
            'clip_max': 1.
        }
        x_adv = jsma.generate(x_input, **jsma_params)
        # sess = tf.Session()
        # sess.run(tf.global_variables_initializer())

        # jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        # jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1.}
        # x_adv = jsma.generate(x_input,**jsma_params)

        # self.structural_kwargs = ['over_shoot', 'max_iter', 'clip_max',
        #                                   'clip_min', 'nb_candidate']

        #                                   self.feedable_kwargs = {'eps': np.float32,
        #                                 'eps_iter': np.float32,
        #                                 'y': np.float32,
        #                                 'y_target': np.float32,
        #                                 'clip_min': np.float32,
        #                                 'clip_max': np.float32}
        #         self.structural_kwargs = ['ord', 'nb_iter']

        # # DeepFool
        # deepfool = DeepFool(model)
        # # # deepfool_params =  {'over_shoot', 'max_iter':1000, 'clip_max':1., 'clip_min':0., 'nb_candidate}
        # deepfool_params =  {'max_iter':10, 'clip_max':1., 'clip_min':-1.}
        # x_adv = deepfool.generate(x_input,**deepfool_params)

        # LBFGS
        # lbfgs = LBFGS(model)
        # lbfgs_params = {'clip_max':1., 'clip_min':-1.}
        # x_adv = lbfgs.generate(x_input, y_target=None, **lbfgs_params)

        # ENM
        # enm = ElasticNetMethod(model)
        # enm_params = {'clip_max':1., 'clip_min':-1.}
        # x_adv = enm.generate(x_input, **enm_params)

        # # sess = tf.Session()
        # # with tf.Session() as sess:
        # # # # # CarliniWagner L2
        # # # # sess = tf.train.MonitoredSession()
        # # # sess = tf.Session()
        # cwl2 = CarliniWagnerL2(model, back='tf', sess=sess)
        # # cwl2 = CarliniWagnerL2(model, back='tf')
        # # # cwl2_params = {'batch_size':9, 'confidence':0, 'max_iterations':1000, 'clip_min':0., 'clip_max':1.}
        # cwl2_params = {'clip_min':-1.0, 'clip_max':1.0}
        # # cwl2_params = {'batch_size':9, 'confidence':0,'learning_rate':1e-2,'binary_search_steps':9, 'max_iterations':1000,'abort_early':True, 'initial_const': 1e-3,'clip_min': 0.0, 'clip_max': 1.0}
        # x_adv = cwl2.generate(x_input,**cwl2_params)
        # with tf.Session() as sess:
        #   cwl2 = CarliniWagnerL2(sess, model, batch_size=1, confidence=0, targeted=True, learning_rate=5e-3, binary_search_steps=5, max_iterations=1000, abort_early=True, initial_const=1e-2, clip_min=0, clip_max=1, num_labels=3, shape=x_input.get_shape().as_list()[1:])
        #   # x_adv = cwl2.
        #   def cw_wrap(x_val, y_val):
        #       return np.array(cwl2.attack(x_val, y_val), dtype=np.float32)
        #   x_adv = tf.py_func(cw_wrap, [x, labels], tf.float32)
        # (self, sess, model, batch_size, confidence,
        #              targeted, learning_rate,
        #              binary_search_steps, max_iterations,
        #              abort_early, initial_const,
        #              clip_min, clip_max, num_labels, shape)

        # attack = CWL2(self.sess, self.model, self.batch_size,
        #               self.confidence, 'y_target' in kwargs,
        #               self.learning_rate, self.binary_search_steps,
        #               self.max_iterations, self.abort_early,
        #               self.initial_const, self.clip_min, self.clip_max,
        #               nb_classes, x_input.get_shape().as_list()[1:])

        # (self, model, back='tf', sess=None)

        print('this line #4')

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        print('this line #5')

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            # with tf.Session() as sess:

            # cwl2 = CarliniWagnerL2(model, back='tf', sess=sess)
            # cwl2_params = {'clip_min':-1.0, 'clip_max':1.0}
            # x_adv = cwl2.generate(x_input,**cwl2_params)

            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                print('this line #6')
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                print('this line #7')
                save_images(adv_images, filenames, FLAGS.output_dir)
                print('this line #8')
Example #12
0
def adv_generate(nb_epochs=25,
                 batch_size=128,
                 learning_rate=0.001,
                 clean_train=True,
                 testing=False,
                 nb_filters=64,
                 num_threads=None,
                 data='cifar',
                 adv_attack='fgsm',
                 save_dir='data'):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    # set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    config = tf.ConfigProto(**config_args)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    if data == "mnist":
        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                      train_end=60000,
                                                      test_start=0,
                                                      test_end=10000)
    else:
        X_train, Y_train, X_test, Y_test = data_cifar10()

    # print (Y_test.shape)
    '''
    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        imsave(os.path.join("benign", str(i) + ".jpg"), img)

    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        benign_path = "benign_" + str(np.argmax(Y_test[i,:], axis=0))
        if not os.path.exists(benign_path):
        	os.makedirs(benign_path)
        imsave(os.path.join(benign_path, str(i) + ".jpg"), img)
    '''
    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    if data == 'mnist':
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    rng = np.random.RandomState([2018, 7, 18])

    if clean_train:
        if data == 'mnist':
            model = build_model(0.01, 1e-6)
        else:
            model = build_model_cifar(0.01, 1e-6)

        preds = model(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == 10000, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        if adv_attack == "FGSM":
            # Initialize the attack object and graph
            # FGSM
            print "FGSM ATTACK..."
            fgsm_params = {'eps': 0.1, 'clip_min': 0., 'clip_max': 1.}
            fgsm = FastGradientMethod(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            preds_adv = model(adv_x)
        elif adv_attack == "CWL2":
            # CWL2
            print "CWL2 ATTACK..."
            cwl2_params = {'batch_size': 8}
            cwl2 = CarliniWagnerL2(model, sess=sess)
            adv_x = cwl2.generate(x, **cwl2_params)
            preds_adv = model(adv_x)
        elif adv_attack == "JSMA":
            # JSMA
            print "JSMA ATTACK..."
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.
            }
            adv_x = jsma.generate(x, **jsma_params)
            preds_adv = model(adv_x)
        elif adv_attack == "DeepFool":
            # DeepFool
            print "DeepFool ATTACK..."
            deepfool = DeepFool(model, sess=sess)
            deepfool_params = {
                'nb_candidate': 10,
                'overshoot': 0.02,
                'max_iter': 50,
                'clip_min': 0.0,
                'clip_max': 1.0
            }
            adv_x = deepfool.generate(x, **deepfool_params)
            preds_adv = model(adv_x)
        elif adv_attack == "LBFGS":
            # LBFGS
            print "LBFGS ATTACK..."
            lbfgs_params = {'y_target': y, 'batch_size': 100}
            lbfgs = LBFGS(model, sess=sess)
            adv_x = lbfgs.generate(x, **lbfgs_params)
            preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        adv_imgs = []
        adv_imgs_test = []

        if not adv_attack == "LBFGS":
            for i in range(5000):
                adv_imgs_train, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_train[i * 10:(i + 1) * 10]})
                adv_imgs.append(adv_imgs_train)
            adv_imgs = np.vstack(adv_imgs)
            print(adv_imgs.shape)
            for i in range(1000):
                adv_imgs_tmp, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_test[i * 10:(i + 1) * 10]})
                adv_imgs_test.append(adv_imgs_tmp)
            adv_imgs_test = np.vstack(adv_imgs_test)
        else:
            for i in range(500):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                print('train image: %s' % str(i))
                adv_imgs.append(adv_imgs_train)
            print(adv_imgs.shape)

            for i in range(100):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                adv_imgs_test.append(adv_imgs_tmp)
                print('test image: %s' % str(i))
            adv_imgs_test = np.vstack(adv_imgs_test)
        '''
        for i in range(6):
            target = np_utils.to_categorical((np.argmax(Y_train[i*10000: (i+1)*10000, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train, adv_labels_train = sess.run([adv_x, preds_adv], feed_dict={x: X_train[i*10000: (i+1)*10000,...],
                                                                                       y: target})
        for i in range(60000):
            target = np_utils.to_categorical((np.argmax(Y_train[i:i+1, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train = sess.run([adv_x], feed_dict={x: X_train[i:i+1,...], y: target})
            print (len(adv_imgs_train), adv_imgs_train[0].shape, adv_imgs_train[1])
        '''
        label_truth_train = np.argmax(Y_train, axis=1)
        label_truth_test = np.argmax(Y_test, axis=1)

        save_dir = os.path.join(
            save_dir, os.path.join(adv_attack))  #, "eps_" + str(eps)))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        print(adv_imgs.shape, adv_imgs_test.shape)
        provider.save_h5(adv_imgs, label_truth_train,
                         os.path.join(save_dir, "train_adv.h5"))
        provider.save_h5(adv_imgs_test, label_truth_test,
                         os.path.join(save_dir, "test_adv.h5"))
        # utils.save_h5(X_train, label_truth_train, "FGSM/train_benign.h5")
        # utils.save_h5(X_test, label_truth_test, "FGSM/test_benign.h5")
        '''
        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
            imsave(os.path.join("adv", str(i) + ".jpg"), img)

        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
	    adv_path = "adv_" + str(np.argmax(adv_labels[i,:], axis=0))
	    if not os.path.exists(adv_path):
	        os.makedirs(adv_path)
	    imsave(os.path.join(adv_path, str(i) + ".jpg"), img)
        '''

        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

    return report
Example #13
0
gtsrb = GtsrbProvider()
# gtsrb.dump_images()

cnn = CNNModel(
    image_size=gtsrb.IMAGE_SIZE,
    classes=gtsrb.CLASSES,
    model_name='gtsrb-64x64',
    model_dir='tmp/gtsrb_model-64x64',
    conv_layers=[32, 64, 128],
    fc_layer=512,
)

x, y = cnn.make_inputs()
probs = cnn.make_model(x)
cnn.start_session()

fgsm = SaliencyMapMethod(cnn, sess=cnn.sess)
adv_x = fgsm.generate(x, **jsma_params)
probs = cnn.make_model(adv_x)

cnn.adv_test(probs, x, y, adv_x, gtsrb.test_data(size=10))
# cnn.test(gtsrb)
cnn.end_session()

#cnn.test(2000, gtsrb)

# for i in range(100):
#     data, label = gtsrb.next_batch('test')
#     print(data, label)
Example #14
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
Example #15
0
 def _JSMA(self):
     jsma_attack = SaliencyMapMethod(self.wrapped_model, sess=self.sess)
     params = {'clip_min': 0., 'clip_max': 1.}
     adv_x = jsma_attack.generate(self.x, **params)
     self.save_images(adv_x, self.save_loc)
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
Example #17
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=True,
                   label_smoothing=0.1):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != 'channels_last':
        raise NotImplementedError(
            "this tutorial requires keras to be configured to channels_last format"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    sal = SaliencyMapMethod(wrap, sess=sess)
    adv_x = sal.generate(x)

    batch = 1000
    x_adv_test = None
    for i in tqdm(range(int(len(x_test) / batch))):
        tmp = sess.run(adv_x, feed_dict={x: x_test[i * batch:(i + 1) * batch]})
        if x_adv_test is None:
            x_adv_test = tmp
        else:
            x_adv_test = np.concatenate((x_adv_test, tmp))

    x_adv_train = None
    for i in tqdm(range(int(len(x_train) / batch))):
        tmp = sess.run(adv_x,
                       feed_dict={x: x_train[i * batch:(i + 1) * batch]})
        if x_adv_train is None:
            x_adv_train = tmp
        else:
            x_adv_train = np.concatenate((x_adv_train, tmp))

    def evaluate_adv():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_adv_test,
                         y_test,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    evaluate_adv()

    save_list = [x_adv_train, x_adv_test]
    print(x_adv_train.shape)
    print(x_adv_test.shape)
    pickle.dump(save_list, open("./sal.pkl", 'wb'))