Esempio n. 1
0
dataset_size = X_train.shape[0]
dataset_train = tf.data.Dataset.range(dataset_size)
dataset_train = dataset_train.shuffle(4096)
dataset_train = dataset_train.repeat()


def lookup(p):
    return X_train[p], y_train[p]


dataset_train = dataset_train.map(
    lambda i: tf.py_func(lookup, [i], [tf.float32] * 2))

dataset_train = dataset_train.map(
    lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
dataset_train = dataset_train.batch(FLAGS.batch_size)
dataset_train = dataset_train.prefetch(16)

# Use Image Parameters
img_rows, img_cols, nchannels = X_val.shape[1:4]
nb_classes = y_val.shape[1]

# Define input TF placeholder
x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
y = tf.placeholder(tf.float32, shape=(None, nb_classes))

# Train a model
train_params = {
    'nb_epochs': FLAGS.nb_epochs,
    'batch_size': FLAGS.batch_size,
Esempio n. 2
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1,
                     adversarial_training=ADVERSARIAL_TRAINING):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :param adversarial_training: True means using adversarial training
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        # put data on cpu and gpu both
        config_args = dict(allow_soft_placement=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    bim_params = {
        'eps': 0.5,
        'clip_min': 0.,
        'eps_iter': 0.002,
        'nb_iter': 10,
        'clip_max': 1.,
        'ord': np.inf
    }
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional('model1',
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        """
    when training, evaluating can be happened
    """
        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())
        # save model

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')
        # Initialize the Basic Iterative Method (BIM) attack object and
        # graph
        for i in range(20):
            bim = BasicIterativeMethod(model, sess=sess)
            adv_x = bim.generate(x, **bim_params)
            preds_adv = model.get_logits(adv_x)
            # Evaluate the accuracy of the MNIST model on adversarial examples
            print("eps:%0.2f" %
                  (bim_params["eps_iter"] * bim_params['nb_iter']))
            do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
            bim_params["eps_iter"] = bim_params["eps_iter"] + 0.002

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

    if not adversarial_training:
        return report

    print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to BasicIterativeMethod
    model2 = ModelAllConvolutional('model2',
                                   nb_classes,
                                   nb_filters,
                                   input_shape=[32, 32, 3])
    bim2 = BasicIterativeMethod(model2, sess=sess)

    def attack(x):
        return bim2.generate(x, **bim_params)

    # add attack to loss
    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the attacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          None,
          None,
          dataset_train=dataset_train,
          dataset_size=dataset_size,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
def cifar10_cw_recon(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     viz_enabled=VIZ_ENABLED,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     source_samples=SOURCE_SAMPLES,
                     learning_rate=LEARNING_RATE,
                     attack_iterations=ATTACK_ITERATIONS,
                     model_path=MODEL_PATH,
                     model_path_cls=MODEL_PATH,
                     targeted=TARGETED,
                     num_threads=None,
                     label_smoothing=0.1,
                     nb_filters=NB_FILTERS):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))
    z = tf.placeholder(tf.float32, shape=(None, nb_latent_size))
    z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size))

    #nb_filters = 64
    nb_layers = 500
    '''
  def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None):
    acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  def eval_cls():
    do_eval_cls(y_logits, x_test, y_test, x_test,'clean_train_clean_eval', False)
  '''
    '''
  def evaluate():
        do_eval(y_logits, x_test, y_test, 'clean_train_clean_eval', False)

  filepath_ae = "clean_model_cifar10_ae.joblib"
  filepath_cl = "classifier_cifar10.joblib"

  
# Define TF model graph
  model = ModelBasicAE('model', nb_layers, nb_latent_size)
  #cl_model = ModelCls('cl_model')
  #cl_model = ModelAllConvolutional('model1', nb_classes, nb_filters,
  #                                input_shape=[32, 32, 3])
  #preds = model.get_logits(x)
  recons = model.get_layer(x, 'RECON')
  latent1_orig = model.get_layer(x, 'LATENT')
  latent1_orig_recon = model.get_layer(recons, 'LATENT')

  loss = SquaredError(model)
  print("Defined TensorFlow model graph.")
  #y_logits = cl_model.get_logits(x)
  #loss_cls = CrossEntropy(cl_model, smoothing=label_smoothing)
  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }
  
  
  train_params_cls = {
      'nb_epochs': 4,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  
  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  #if os.path.exists(model_path + ".meta"):
   # tf_model_load(sess, model_path)
  #else:
  #eval_params_cls = {'batch_size': batch_size}
  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  
  def do_eval(recons, x_orig, x_target, y_orig, y_target, report_key, is_adv=False, x_adv = None, recon_adv = False, lat_orig = None, lat_orig_recon = None):
    noise, d_orig, d_targ, avg_dd, d_latent = model_eval_ae(sess, x, x_t, recons, x_orig, x_target, x_adv, recon_adv, lat_orig, lat_orig_recon, args = eval_params)
    setattr(report, report_key, avg_dd)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test d1 on ', report_text,  ' examples: ', d_orig)
      print('Test d2 on ', report_text,' examples: ', d_targ)
      print('Test distance difference on %s examples: %0.4f' % (report_text, avg_dd))
      print('Noise added: ', noise)
      print("dist_latent_orig_recon on ", report_text, "examples : ", d_latent)
      print()

  def evaluate_ae():
    do_eval(recons, x_test, x_test, y_test, y_test, 'clean_train_clean_eval', False, None, None, latent1_orig, latent1_orig_recon)

  print("Training autoencoder")
  train_ae(sess, loss, x_train,x_train, evaluate = evaluate_ae, args=train_params, rng=rng, var_list=model.get_params())
  #with sess.as_default():
   # save(filepath_ae, model)
  '''
    save_dir = 'models'
    model_name = 'cifar10_AE'
    model_path_ae = os.path.join(save_dir, model_name)

    if clean_train_ae == True:
        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model = Model(input_img, decoded)
        model.compile(optimizer='adam', loss='binary_crossentropy')
        #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
        #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5'
        #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
        model.fit(
            x_train,
            x_train,
            batch_size=128,
            epochs=2,
            verbose=1,
            validation_data=(x_test, x_test),
            #callbacks=[es_cb, cp_cb],
            shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model.save(model_path_ae)
        print('Saved trained model at %s ' % model_path)

    else:
        model = load_model(model_path_ae)

    num_classes = 10
    save_dir = 'models'
    model_name = 'cifar10_CNN'
    model_path_cls = os.path.join(save_dir, model_name)

    if clean_train_cl == True:
        print("Training CNN classifier")
        cl_model = Sequential()
        cl_model.add(
            Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(32, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Conv2D(64, (3, 3), padding='same'))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(64, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Flatten())
        cl_model.add(Dense(512))
        cl_model.add(Activation('relu'))
        cl_model.add(Dropout(0.5))
        cl_model.add(Dense(num_classes))
        cl_model.add(Activation('softmax'))

        opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

        # Let's train the model using RMSprop
        cl_model.compile(loss='categorical_crossentropy',
                         optimizer=opt,
                         metrics=['accuracy'])

        cl_model.fit(x_train,
                     y_train,
                     batch_size=90,
                     epochs=4,
                     validation_data=(x_test, y_test),
                     shuffle=True)

        cl_model.save(model_path_cls)
        print('Saved trained model at %s ' % model_path)

    else:
        cl_model = load_model(model_path_cls)

        # Score trained model.
    scores = cl_model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    '''
  train(sess, loss_cls, None, None,
            dataset_train=dataset_train, dataset_size=dataset_size,
            evaluate=eval_cls, args=train_params_cls, rng=rng,
            var_list=cl_model.get_params())
  '''
    #with sess.as_default():
    # save(filepath_cl, cl_model)
    '''
  else:
    

    model = load(filepath_ae)
    cl_model = load(filepath_cl)
  '''

    #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params())
    #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params())

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(model, cl_model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 4,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    adv = sess.run(adv)
    #print("shaep of adv: ", np.shape(adv))
    '''
  recons = model.get_layer(x, 'RECON')
  recon_orig = model.get_layer(adv_inputs, 'RECON')
  recon_adv = model.get_layer(adv, 'RECON')
  lat_orig = model.get_layer(x, 'LATENT')
  lat_orig_recon = model.get_layer(recons, 'LATENT')
  #pred_adv_recon = cl_model.get_logits(recon_adv)
  '''
    recon_orig = model.predict(adv_inputs)
    recon_adv = model.predict(adv)
    #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    #eval_params = {'batch_size': 90}

    #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
    #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)

    #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
    shape = np.shape(adv_inputs)
    noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape))))
    print("noise: ", noise)
    #recon_adv = sess.run(recon_adv)
    #recon_orig = sess.run(recon_orig)
    scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1)
    print("classifier acc_target: ", scores2[1])
    print("classifier acc_true: ", scores1[1])

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    #sess.close()

    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:
        #_ = grid_visual(grid_viz_data)
        #_ = grid_visual(grid_viz_data_1)

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        model_name = 'cifar10_AE_adv'
        model_path_ae = os.path.join(save_dir, model_name)

        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model2 = Model(input_img, decoded)
        model2.compile(optimizer='adam', loss='binary_crossentropy')

        model2.fit(x_train_app,
                   x_train_aim,
                   batch_size=128,
                   epochs=20,
                   verbose=1,
                   validation_data=(x_test, x_test),
                   callbacks=[es_cb, cp_cb],
                   shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model2.save(model_path_ae_adv)
        print('Saved adv trained model at %s ' % model_path)
        '''
    model_adv_trained = ModelBasicAE('model_adv_trained', nb_layers, nb_latent_size)
    recons_2 = model_adv_trained.get_layer(x, 'RECON')
    loss_2 = SquaredError(model_adv_trained) 
    train_ae(sess, loss_2, x_train_app, x_train_aim ,args=train_params, rng=rng, var_list=model_adv_trained.get_params())
    saver = tf.train.Saver()
    saver.save(sess, model_path)
    '''

        cw2 = CarliniWagnerAE(model_adv_trained, cl_model, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = model2.predict(adv)
        recon_orig = model2.predict(adv_inputs)
        #print("shaep of adv: ", np.shape(adv))
        '''
    recon_orig = model_adv_trained.get_layer(adv_inputs, 'RECON')
    recon_adv = model_adv_trained.get_layer(adv_2, 'RECON')
    lat_orig = model_adv_trained.get_layer(x, 'LATENT')
    lat_orig_recon = model_adv_trained.get_layer(recons, 'LATENT')
    '''
        #pred_adv_recon = cl_model.get_logits(recon_adv)

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        #eval_params = {'batch_size': 90}
        if targeted:
            #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv_2, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
            #acc = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)
            #print("d1: ", d1)
            #print("d2: ", d2)
            #print("d1-d2: ", dist_diff)
            #print("Avg_dist_lat: ", avg_dist_lat)
            #print("classifier acc: ", acc)
        '''  
    recon_adv = sess.run(recon_adv)
    recon_orig = sess.run(recon_orig)
    '''
        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_adv_trained')

            return report


#binarization defense
    if (binarization_defense == True or mean_filtering == True):

        #adv = sess.run(adv)
        # print(adv[0])
        if (binarization_defense == True):
            adv[adv > 0.5] = 1.0
            adv[adv <= 0.5] = 0.0
        else:
            #radius = 2
            #adv_list = [mean(adv[i,:,:,0], disk(radius)) for i in range(0, np.shape(adv)[0])]
            #adv = np.array(adv_list)
            #adv = np.expand_dims(adv, axis = 3)
            adv = uniform_filter(adv, 2)
            #adv = median_filter(adv, 2)
        #print("after bin ")
        #print(adv[0])
        '''
    recons = model.get_layer(x, 'RECON')
    recon_orig = model.get_layer(adv_inputs, 'RECON')
    recon_adv = model.get_layer(adv, 'RECON')
    lat_orig = model.get_layer(x, 'LATENT')
    lat_orig_recon = model.get_layer(recon_orig, 'LATENT')
    '''
        recon_orig = model.predict(adv_inputs)
        recon_adv = model.predict(adv)

        #pred_adv_recon = cl_model.get_logits(recon_adv)

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        eval_params = {'batch_size': 90}
        if targeted:
            #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
            #acc1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
            #acc2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)

            #print("d1: ", d1)
            #print("d2: ", d2)
            noise = reduce_sum(tf.square(x_orig - x_adv),
                               list(range(1, len(shape))))
            print("noise: ", noise)
            #print("classifier acc for target class: ", acc1)
            #print("classifier acc for true class: ", acc2)
        '''
    recon_adv = sess.run(recon_adv)
    recon_orig = sess.run(recon_orig)
    '''
        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]
            sess.close()

            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2_bin')
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     architecture=ARCHITECTURE,
                     load_model=LOAD_MODEL,
                     ckpt_dir='None',
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(int(time.time() * 1000) % 2**31)
    np.random.seed(int(time.time() * 1001) % 2**31)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')

    pgd_train = None
    if FLAGS.load_pgd_train_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_train_samples))
        x_train = np.load(os.path.join(pgd_path, 'train_clean.npy'))
        y_train = np.load(os.path.join(pgd_path, 'train_y.npy'))
        pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy'))
        if x_train.shape[1] == 3:
            x_train = x_train.transpose((0, 2, 3, 1))
            pgd_train = pgd_train.transpose((0, 2, 3, 1))
        if len(y_train.shape) == 1:
            y_tmp = np.zeros((len(y_train), np.max(y_train) + 1),
                             y_train.dtype)
            y_tmp[np.arange(len(y_tmp)), y_train] = 1.
            y_train = y_tmp

    x_test, y_test = data.get_set('test')
    pgd_test = None
    if FLAGS.load_pgd_test_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_test_samples))
        x_test = np.load(os.path.join(pgd_path, 'test_clean.npy'))
        y_test = np.load(os.path.join(pgd_path, 'test_y.npy'))
        pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy'))
        if x_test.shape[1] == 3:
            x_test = x_test.transpose((0, 2, 3, 1))
            pgd_test = pgd_test.transpose((0, 2, 3, 1))
        if len(y_test.shape) == 1:
            y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype)
            y_tmp[np.arange(len(y_tmp)), y_test] = 1.
            y_test = y_tmp

    train_idcs = np.arange(len(x_train))
    np.random.shuffle(train_idcs)
    x_train, y_train = x_train[train_idcs], y_train[train_idcs]
    if pgd_train is not None:
        pgd_train = pgd_train[train_idcs]
    test_idcs = np.arange(len(x_test))[:FLAGS.test_size]
    np.random.shuffle(test_idcs)
    x_test, y_test = x_test[test_idcs], y_test[test_idcs]
    if pgd_test is not None:
        pgd_test = pgd_test[test_idcs]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    pgd_params = {
        # ord: ,
        'eps': FLAGS.eps,
        'eps_iter': (FLAGS.eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }
    cw_params = {
        'binary_search_steps': FLAGS.cw_search_steps,
        'max_iterations': FLAGS.cw_steps,  #1000
        'abort_early': True,
        'learning_rate': FLAGS.cw_lr,
        'batch_size': batch_size,
        'confidence': 0,
        'initial_const': FLAGS.cw_c,
        'clip_min': 0,
        'clip_max': 255
    }

    # Madry dosen't divide by 255
    x_train *= 255
    x_test *= 255
    if pgd_train is not None:
        pgd_train *= 255
    if pgd_test is not None:
        pgd_test *= 255

    print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train)))
    print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test)))

    print(
        'clip_min : {}, clip_max : {}  >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<'
        .format(pgd_params['clip_min'], pgd_params['clip_max']))

    rng = np.random.RandomState()  # [2017, 8, 30]
    debug_dict = dict() if FLAGS.save_debug_dict else None

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                predictor=None,
                x_adv=None):
        if predictor is None:
            acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        else:
            do_eval(preds, x_set, y_set, report_key, is_adv=is_adv)
            if x_adv is not None:
                x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set],
                                        batch_size=batch_size)
                assert x_set.shape == x_set_adv.shape
                x_set = x_set_adv
            n_batches = math.ceil(x_set.shape[0] / batch_size)
            p_set, p_det = np.concatenate([
                predictor.send(x_set[b * batch_size:(b + 1) * batch_size])
                for b in tqdm.trange(n_batches)
            ]).T
            acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean()
            # if is_adv:
            # import IPython ; IPython.embed() ; exit(1)
            if FLAGS.save_debug_dict:
                debug_dict['x_set'] = x_set
                debug_dict['y_set'] = y_set
                ddfn = 'logs/debug_dict_{}.pkl'.format(
                    'adv' if is_adv else 'clean')
                if not os.path.exists(ddfn):
                    with open(ddfn, 'wb') as f:
                        pickle.dump(debug_dict, f)
                debug_dict.clear()
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples %s: %0.4f' %
                  (report_text, 'with correction'
                   if predictor is not None else 'without correction', acc))
            if is_adv is not None:
                label = 'test_acc_{}_{}'.format(
                    report_text, 'corrected' if predictor else 'uncorrected')
                swriter.add_scalar(label, acc)
                if predictor is not None:
                    detect = np.equal(p_det, is_adv).mean()
                    label = 'test_det_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    print(label, detect)
                    swriter.add_scalar(label, detect)
                    label = 'test_dac_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    swriter.add_scalar(
                        label,
                        np.equal(p_set,
                                 y_set[:len(p_set)].argmax(-1))[np.equal(
                                     p_det, is_adv)].mean())

        return acc

    if clean_train:
        if architecture == 'ConvNet':
            model = ModelAllConvolutional('model1',
                                          nb_classes,
                                          nb_filters,
                                          input_shape=[32, 32, 3])
        elif architecture == 'ResNet':
            model = ResNet(scope='ResNet')
        else:
            raise Exception('Specify valid classifier architecture!')

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        if load_model:
            model_name = 'naturally_trained'
            if FLAGS.load_adv_trained:
                model_name = 'adv_trained'
            if ckpt_dir is not 'None':
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(os.path.expanduser(ckpt_dir), model_name))
            else:
                ckpt = tf.train.get_checkpoint_state('./models/' + model_name)
            ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

            saver = tf.train.Saver(var_list=dict(
                (v.name.split('/', 1)[1].split(':')[0], v)
                for v in tf.global_variables()))
            saver.restore(sess, ckpt_path)
            print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

            initialize_uninitialized_global_variables(sess)

        else:

            def evaluate():
                do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

            train(sess,
                  loss,
                  None,
                  None,
                  dataset_train=dataset_train,
                  dataset_size=dataset_size,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params())

        logits_op = preds.op
        while logits_op.type != 'MatMul':
            logits_op = logits_op.inputs[0].op
        latent_x_tensor, weights = logits_op.inputs
        logits_tensor = preds

        nb_classes = weights.shape[-1].value

        if not FLAGS.save_pgd_samples:
            noise_eps = FLAGS.noise_eps.split(',')
            if FLAGS.noise_eps_detect is None:
                FLAGS.noise_eps_detect = FLAGS.noise_eps
            noise_eps_detect = FLAGS.noise_eps_detect.split(',')
            if pgd_train is not None:
                pgd_train = pgd_train[:FLAGS.n_collect]
            if not FLAGS.passthrough:
                predictor = tf_robustify.collect_statistics(
                    x_train[:FLAGS.n_collect],
                    y_train[:FLAGS.n_collect],
                    x,
                    sess,
                    logits_tensor=logits_tensor,
                    latent_x_tensor=latent_x_tensor,
                    weights=weights,
                    nb_classes=nb_classes,
                    p_ratio_cutoff=FLAGS.p_ratio_cutoff,
                    noise_eps=noise_eps,
                    noise_eps_detect=noise_eps_detect,
                    pgd_eps=pgd_params['eps'],
                    pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
                    pgd_iters=pgd_params['nb_iter'],
                    save_alignments_dir='logs/stats'
                    if FLAGS.save_alignments else None,
                    load_alignments_dir=os.path.expanduser(
                        '~/data/advhyp/madry/stats')
                    if FLAGS.load_alignments else None,
                    clip_min=pgd_params['clip_min'],
                    clip_max=pgd_params['clip_max'],
                    batch_size=batch_size,
                    num_noise_samples=FLAGS.num_noise_samples,
                    debug_dict=debug_dict,
                    debug=FLAGS.debug,
                    targeted=False,
                    pgd_train=pgd_train,
                    fit_classifier=FLAGS.fit_classifier,
                    clip_alignments=FLAGS.clip_alignments,
                    just_detect=FLAGS.just_detect)
            else:

                def _predictor():
                    _x = yield
                    while (_x is not None):
                        _y = sess.run(preds, {x: _x}).argmax(-1)
                        _x = yield np.stack((_y, np.zeros_like(_y)), -1)

                predictor = _predictor()
            next(predictor)
            if FLAGS.save_alignments:
                exit(0)

            # Evaluate the accuracy of the model on clean examples
            acc_clean = do_eval(preds,
                                x_test,
                                y_test,
                                'clean_train_clean_eval',
                                False,
                                predictor=predictor)

        # Initialize the PGD attack object and graph
        if FLAGS.attack == 'pgd':
            pgd = MadryEtAl(model, sess=sess)
            adv_x = pgd.generate(x, **pgd_params)
        elif FLAGS.attack == 'cw':
            cw = CarliniWagnerL2(model, sess=sess)
            adv_x = cw.generate(x, **cw_params)
        elif FLAGS.attack == 'mean':
            pgd = MadryEtAl(model, sess=sess)
            mean_eps = FLAGS.mean_eps * FLAGS.eps

            def _attack_mean(x):
                x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1))
                x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps,
                                                     mean_eps)
                x_noisy = tf.clip_by_value(x_noisy, 0, 255)
                x_pgd = pgd.generate(x_noisy, **pgd_params)
                x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps)
                x_clip = tf.maximum(x_clip, x_many - FLAGS.eps)
                x_clip = tf.clip_by_value(x_clip, 0, 255)
                return x_clip

            adv_x = tf.map_fn(_attack_mean, x)
            adv_x = tf.reduce_mean(adv_x, 1)

        preds_adv = model.get_logits(adv_x)

        if FLAGS.save_pgd_samples:
            for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test,
                                                              'test')):
                train_batches = math.ceil(len(ds) / FLAGS.batch_size)
                train_pgd = np.concatenate([
                    sess.run(adv_x, {
                        x:
                        ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size]
                    }) for b in tqdm.trange(train_batches)
                ])
                np.save('logs/{}_clean.npy'.format(name), ds / 255.)
                np.save('logs/{}_y.npy'.format(name), y)
                train_pgd /= 255.
                np.save('logs/{}_pgd.npy'.format(name), train_pgd)
            exit(0)

        # Evaluate the accuracy of the model on adversarial examples
        if not FLAGS.load_pgd_test_samples:
            acc_pgd = do_eval(preds_adv,
                              x_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor,
                              x_adv=adv_x)
        else:
            acc_pgd = do_eval(preds,
                              pgd_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor)
        swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0)

        print('Repeating the process, using adversarial training')

    exit(0)
    # Create a new model and train it to be robust to MadryEtAl
    if architecture == 'ConvNet':
        model2 = ModelAllConvolutional('model2',
                                       nb_classes,
                                       nb_filters,
                                       input_shape=[32, 32, 3])
    elif architecture == 'ResNet':
        model = ResNet()
    else:
        raise Exception('Specify valid classifier architecture!')

    pgd2 = MadryEtAl(model2, sess=sess)

    def attack(x):
        return pgd2.generate(x, **pgd_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For some attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    if load_model:
        if ckpt_dir is not 'None':
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained'))
        else:
            ckpt = tf.train.get_checkpoint_state('./models/adv_trained')
        ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

        assert ckpt_path and tf_model_load(
            sess, file_path=ckpt_path), '\nMODEL LOADING FAILED'
        print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

        initialize_uninitialized_global_variables(sess)

    else:

        def evaluate2():
            # Accuracy of adversarially trained model on legitimate test inputs
            do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
            # Accuracy of the adversarially trained model on adversarial
            # examples
            do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

        # Perform and evaluate adversarial training
        train(sess,
              loss2,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate2,
              args=train_params,
              rng=rng,
              var_list=model2.get_params())

    # Evaluate model
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    return report
def cifar10_cw_latent(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      targeted=TARGETED,
                      num_threads=None,
                      label_smoothing=0.1,
                      nb_filters=NB_FILTERS):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))
    z = tf.placeholder(tf.float32, shape=(None, nb_latent_size))
    z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size))

    save_dir = 'models'
    model_name = 'cifar10_AE'
    model_path_ae = os.path.join(save_dir, model_name)

    if clean_train_ae == True:
        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model = Model(input_img, decoded)
        model.compile(optimizer='adam', loss='binary_crossentropy')
        #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
        #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5'
        #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
        model.fit(
            x_train,
            x_train,
            batch_size=128,
            epochs=5,
            verbose=1,
            validation_data=(x_test, x_test),
            #callbacks=[es_cb, cp_cb],
            shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model.save(model_path_ae)
        print('Saved trained model at %s ' % model_path_ae)

    else:
        model = load_model(model_path_ae)

    x_lat_train = model.predict(x_train)
    x_lat_test = model.predict(x_test)

    num_classes = 10
    save_dir = 'models'
    model_name = 'cifar10_CNN_latent'
    model_path_cls = os.path.join(save_dir, model_name)

    if clean_train_cl == True:
        print("Training CNN AE")
        cl_model = Sequential()
        cl_model.add(
            Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(32, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Conv2D(64, (3, 3), padding='same'))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(64, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Flatten())
        cl_model.add(Dense(512))
        cl_model.add(Activation('relu'))
        cl_model.add(Dropout(0.5))
        cl_model.add(Dense(num_classes))
        cl_model.add(Activation('softmax'))

        opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

        # Let's train the model using RMSprop
        cl_model.compile(loss='categorical_crossentropy',
                         optimizer=opt,
                         metrics=['accuracy'])

        cl_model.fit(x_lat_train,
                     y_train,
                     batch_size=90,
                     epochs=2,
                     validation_data=(x_test, y_test),
                     shuffle=True)

        cl_model.save(model_path_cls)
        print('Saved trained model at %s ' % model_path_cls)

    else:
        cl_model = load_model(model_path_cls)

        # Score trained model.
    scores = cl_model.evaluate(x_lat_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack Object
    cw = CarliniWagnerAE_Lat_Keras(model, cl_model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 4,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    adv = sess.run(adv)

    recon_orig = model.predict(adv_inputs)
    recon_adv = model.predict(adv)
    shape = np.shape(adv_inputs)
    noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape))))
    print("noise: ", noise)
    scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1)
    print("classifier acc_target: ", scores2[1])
    print("classifier acc_true: ", scores1[1])

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        model_name = 'cifar10_AE_adv_lat'
        model_path_ae_adv = os.path.join(save_dir, model_name)

        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model2 = Model(input_img, decoded)
        model2.compile(optimizer='adam', loss='binary_crossentropy')

        model2.fit(x_train_app,
                   x_train_aim,
                   batch_size=128,
                   epochs=20,
                   verbose=1,
                   validation_data=(x_test, x_test),
                   callbacks=[es_cb, cp_cb],
                   shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model2.save(model_path_ae_adv)
        print('Saved adv trained model at ', model_path_ae_adv)

        cw2 = CarliniWagnerAE_Lat_Keras(model_adv_trained, cl_model, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = model2.predict(adv)
        recon_orig = model2.predict(adv_inputs)
        if targeted:

            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)

        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_adv_trained')

            return report


#binarization defense
    if (binarization_defense == True or mean_filtering == True):
        if (binarization_defense == True):
            adv[adv > 0.5] = 1.0
            adv[adv <= 0.5] = 0.0
        else:

            adv = uniform_filter(adv, 2)

        recon_orig = model.predict(adv_inputs)
        recon_adv = model.predict(adv)

        eval_params = {'batch_size': 90}
        if targeted:

            noise = reduce_sum(tf.square(x_orig - x_adv),
                               list(range(1, len(shape))))
            print("noise: ", noise)

        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]
            sess.close()

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2_bin')
Esempio n. 6
0
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0,
                     test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS, num_threads=None,
                     label_smoothing=0.1, retrain=False,
                      source_samples=SOURCE_SAMPLES,
                      attack_iterations=ATTACK_ITERATIONS,
                      targeted=TARGETED):
  """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  ###########################
  # Adjust hue / saturation #
  ###########################
  # hueValue = 0.3
  # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, hueValue), hueValue)
  # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue)
  # x_test = sess.run(tf_x_test)




  ###############################
  # Transform image to uniimage #
  ###############################
  # x_train = convert_uniimage(x_train)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))






  saveFileNumArr = []
  # saveFileNumArr = [50, 500, 1000]

  count = 0
  while count < 1000:
    count = count + 50
    saveFileNumArr.append(count)

  distortionArr = []
  accuracyArr = []
  for i in range(len(saveFileNumArr)):
    saveFileNum = saveFileNumArr[i]
    model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum))
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])

    print("Trying to load trained model from: " + model_path)
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
      tf_model_load(sess, model_path)
      print("Load trained model")
    else:
      train(sess, loss, x_train, y_train, args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    # accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    # assert x_test.shape[0] == test_end - test_start, x_test.shape
    # print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    # report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if targeted:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

      one_hot = np.zeros((nb_classes, nb_classes))
      one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

      adv_inputs = adv_inputs.reshape(
          (source_samples * nb_classes, img_rows, img_cols, nchannels))
      adv_ys = np.array([one_hot] * source_samples,
                        dtype=np.float32).reshape((source_samples *
                                                   nb_classes, nb_classes))
      yname = "y_target"
    else:
      adv_inputs = x_test[:source_samples]
      adv_inputs = x_test

      adv_ys = None
      yname = "y"

    if targeted:
      cw_params_batch_size = source_samples * nb_classes
    else:
      cw_params_batch_size = source_samples
    cw_params = {'binary_search_steps': 1,
                 'max_iterations': attack_iterations,
                 'learning_rate': CW_LEARNING_RATE,
                 'batch_size': cw_params_batch_size,
                 'initial_const': 10}

    adv2 = cw.generate(x, **cw_params)
    cw_params[yname] = adv_ys
    adv_x = None
    # adv_x = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
      accuracy = model_eval(
          sess, x, y, preds, adv_x, adv_ys, args=eval_params)
    else:
      # err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
      #                  args=eval_params)
      accuracy, distortion = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params, is_adv=True, ae=adv2,
                                        type=type, datasetName="CIFAR10", discretizeColor=discretizeColor)

    print('--------------------------------------')
    print("load save file: ", saveFileNum)
    # Compute the number of adversarial examples that were successfully found
    # print('Test with adv. examples {0:.4f}'.format(adv_accuracy))
    print('Test accuracy on examples: %0.4f ,distortion: %0.4f' % (accuracy, distortion))

    distortionArr.append(distortion)
    accuracyArr.append(accuracy)
    # print(str(accuracy))
    # print(str(distortion))
    tf.reset_default_graph()

  print("accuracy:")
  for accuracy in accuracyArr:
    print(accuracy)

  print("distortion:")
  for distortion in distortionArr:
    print(distortion)

  # Close TF session
  sess.close()


  return report
Esempio n. 7
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     model_path=MODEL_PATH,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        print('start')
        #model = CNN('model1', nb_classes, isL2 = True)
        model = make_wresnet(scope='model1')
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)
        tf_model_load(
            sess,
            '/nfs/nas4/data-hanwei/data-hanwei/DATA/models/wresnet/cifar1')

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,
                                               momentum=0)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,momentum=0.9)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9)
        train(sess,
              x,
              y,
              model,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params(),
              optimizer=optimizer)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    return report
def generate_adv_images(gpu,
                        attack_algo,
                        dataset,
                        source_data_dir,
                        train_start=0,
                        train_end=1000000,
                        test_start=0,
                        test_end=100000,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        learning_rate=0.001,
                        testing=False,
                        num_threads=None,
                        label_smoothing=0.1,
                        args=FLAGS):
    """
    CIFAR10 cleverhans tutorial
    :param source_data_dir: the CIFAR-10 source data directory
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    if "batch_size" in UNTARGETED_ATTACKER_PARAM[attack_algo]:
        global BATCH_SIZE
        batch_size = UNTARGETED_ATTACKER_PARAM[attack_algo]["batch_size"]
        config.BATCH_SIZE = batch_size
    output_dir = DATASET_ADV_OUTPUT[args.dataset] + "/" + args.arch
    os.makedirs(output_dir, exist_ok=True)
    report = AccuracyReport()
    # if (os.path.exists(output_dir + "/{0}_untargeted_train.npz".format(attack_algo)) and
    #     os.path.exists(output_dir + "/{0}_untargeted_test.npz".format(attack_algo))):
    #     return report
    # Object used to keep track of (and return) key accuracies
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    # Set logging level to see debug information
    set_log_level(logging.DEBUG)
    # Create TF session
    config_args = {}
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    config_args["gpu_options"] = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    # Get CIFAR10 data
    if dataset == "CIFAR10":
        data = CIFAR10(data_dir=source_data_dir,
                       train_start=train_start,
                       train_end=train_end,
                       test_start=test_start,
                       test_end=test_end)
    elif dataset == "CIFAR100" or dataset == "CIFAR100_coarse_label":
        data = CIFAR100(data_dir=source_data_dir,
                        dataset_name=dataset,
                        train_start=train_start,
                        train_end=train_end,
                        test_start=test_start,
                        test_end=test_end)
    elif dataset == "MNIST" or dataset == "FashionMNIST":
        data = MNIST(data_dir=source_data_dir,
                     train_start=train_start,
                     train_end=train_end,
                     test_start=test_start,
                     test_end=test_end)
    elif dataset == "ImageNet":
        data = MiniImageNet(data_dir=source_data_dir,
                            train_start=train_start,
                            train_end=train_end,
                            test_start=test_start,
                            num_classes=CLASS_NUM["ImageNet"],
                            arch=args.arch)
    elif dataset == "TinyImageNet":
        data = TinyImageNet(data_dir=source_data_dir,
                            train_start=train_start,
                            train_end=train_end,
                            test_start=test_start,
                            num_classes=CLASS_NUM["TinyImageNet"])

    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    # dataset_train = dataset_train.shuffle(buffer_size=2000)
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}

    rng = np.random.RandomState([2017, 8, 30])

    def do_generate_eval(adv_x,
                         pred_adv_x,
                         x_set,
                         y_set,
                         report_key,
                         is_adv=None):
        adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval(
            sess, x, y, adv_x, pred_adv_x, x_set, y_set, args=eval_params)

        setattr(report, report_key, success_rate)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('adversarial attack successful rate on %s: %0.4f' %
                  (report_text, success_rate))
        return adv_images_total, adv_pred_total, gt_label_total, success_rate  # shape = (total, H,W,C)

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if args.arch == "conv4":
        model = Shallow4ConvLayersConv(
            args.arch,
            IMG_SIZE[dataset],
            CLASS_NUM[dataset],
            in_channels=DATASET_INCHANNELS[args.dataset],
            dim_hidden=64)
        model.is_training = False
    # elif args.arch == "conv10":
    #     model = Shallow10ConvLayersConv(args.arch, CLASS_NUM[dataset], nb_filters=64,
    #                                     input_shape=[IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset]])
    elif args.arch == "vgg16":
        model = VGG16("vgg_16", CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
        model.is_training = False
    elif args.arch == "vgg16small":
        model = VGG16Small(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet10":
        model = ResNet10(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet18":
        model = ResNet18(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet50":
        model = ResNet50(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet101":
        model = ResNet101(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])

    def evaluate():
        if hasattr(model, "is_training"):
            model.is_training = False
        preds = model.get_logits(x)  # tf.tensor
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)
        if hasattr(model, "is_training"):
            model.is_training = True

    resume = TF_CLEAN_IMAGE_MODEL_PATH[args.dataset] + "/{0}".format(args.arch)
    os.makedirs(resume, exist_ok=True)
    print("using folder {} to store model".format(resume))
    resume_files = os.listdir(resume)
    loss = CrossEntropy(model, smoothing=label_smoothing)
    if len(resume_files) == 0 or len(
            list(
                filter(lambda e: os.path.isfile(resume + "/" + e),
                       resume_files))) == 0:  # clean train must be done!
        if hasattr(model, "is_training"):
            model.is_training = True

        var_list = tf.trainable_variables()
        g_list = tf.global_variables()
        bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
        bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
        var_list += bn_moving_vars

        saver = tf.train.Saver(var_list=var_list)
        train(sess,
              loss,
              None,
              None,
              model,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())  # 训练nb_epochs个epochs
        save_path = saver.save(sess,
                               "{}/model".format(resume),
                               global_step=nb_epochs)
        print("Model saved in path: %s" % save_path)
    else:
        if len(os.listdir(resume)) == 1 and os.listdir(resume)[0].endswith(
                "ckpt"):
            path = resume + "/" + os.listdir(resume)[0]
            var_list = tf.trainable_variables()
            g_list = tf.global_variables()
            bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
            bn_moving_vars += [
                g for g in g_list if 'moving_variance' in g.name
            ]
            var_list += bn_moving_vars
            saver = tf.train.Saver(var_list=var_list)
            saver.restore(sess, path)
            print("load pretrained model {}".format(path))
        else:
            # resume from old
            latest_checkpoint = tf.train.latest_checkpoint(resume)
            var_list = tf.trainable_variables()
            g_list = tf.global_variables()
            bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
            bn_moving_vars += [
                g for g in g_list if 'moving_variance' in g.name
            ]
            var_list += bn_moving_vars
            saver = tf.train.Saver(var_list=var_list)
            saver.restore(sess, latest_checkpoint)
            print("load pretrained model {}".format(resume))

        # Calculate training error
        if testing:
            evaluate()
    if hasattr(model, "is_training"):
        model.is_training = False
    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    attacker = ATTACKERS[attack_algo](model, sess=sess)
    param_dict = UNTARGETED_ATTACKER_PARAM[attack_algo]

    if attack_algo in NEED_TARGETED_Y:
        y_target = look_for_target_otherthan_gt(y, CLASS_NUM[args.dataset])
        y_target = tf.reshape(y_target, (batch_size, -1))
        param_dict["y_target"] = y_target

    adv_x = attacker.generate(x, **param_dict)  # tensor
    preds_adv = model.get_logits(adv_x)
    # generate adversarial examples
    adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(
        adv_x, preds_adv, x_train, y_train, "clean_train_adv_eval", True)
    np.savez(output_dir + "/{0}_untargeted_train.npz".format(attack_algo),
             adv_images=adv_images_total,
             adv_pred=adv_pred_total,
             gt_label=gt_label_total,
             attack_success_rate=success_rate)

    adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(
        adv_x, preds_adv, x_test, y_test, "clean_test_adv_eval", True)
    np.savez(output_dir + "/{0}_untargeted_test.npz".format(attack_algo),
             adv_images=adv_images_total,
             adv_pred=adv_pred_total,
             gt_label=gt_label_total,
             attack_success_rate=success_rate)
    print('generate {} adversarial image done'.format(attack_algo))

    return report
Esempio n. 9
0
def cifar10_cw_recon(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     viz_enabled=VIZ_ENABLED,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     source_samples=SOURCE_SAMPLES,
                     learning_rate=LEARNING_RATE,
                     attack_iterations=ATTACK_ITERATIONS,
                     model_path=MODEL_PATH,
                     model_path_cls=MODEL_PATH,
                     targeted=TARGETED,
                     num_threads=None,
                     label_smoothing=0.1,
                     nb_filters=NB_FILTERS,
                     filename=FILENAME,
                     train_dir_ae=TRAIN_DIR_AE,
                     train_dir_cl=TRAIN_DIR_CL):

    # Object used to keep track of (and return) key accuracies

    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))

    #model_vae= vae_model(x, img_rows=img_rows, img_cols=img_cols,
    #                 channels=nchannels)

    wrap_vae = ModelVAE('wrap_vae')
    recon = wrap_vae.get_layer(x, 'RECON')
    #print("recon: ",recon)
    print("Defined TensorFlow model graph.")

    def evaluate_ae():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': 128}
        noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
            sess, x, x_t, recon, x_train, x_train, args=eval_params)
        print("reconstruction distance: ", d1)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_ae,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_ae):
        os.mkdir(train_dir_ae)

    #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
    #print(train_dir_ae, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    #wrap_vae = KerasModelWrapper(model_vae)
    latent_dim = 20
    intermediate_dim = 128

    #train_ae(sess, global_loss, x_train, x_train, evaluate = evaluate_ae, args = train_params, rng = rng, var_list=wrap_vae.get_params())

    if clean_train_vae == True:
        print("Training VAE")
        loss = vae_loss(wrap_vae)
        train_ae(sess,
                 loss,
                 x_train,
                 x_train,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng,
                 var_list=wrap_vae.get_params())
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_vae.ckpt")
        print("saved model")

    else:
        print("Loading VAE")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_vae.ckpt")
        evaluate_ae()
        if (train_further):
            train_params = {
                'nb_epochs': 10,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'train_dir': train_dir_ae,
                'filename': filename
            }
            #training with the saved model as starting point
            loss = SquaredError(wrap_vae)
            train_ae(sess,
                     loss,
                     x_train,
                     x_train,
                     evaluate=evaluate_vae,
                     args=train_params,
                     rng=rng)
            saver = tf.train.Saver()
            saver.save(sess, "train_dir/model_ae_final.ckpt")

            evaluate_ae()

            print("Model loaded and trained for more epochs")

    num_classes = 10
    '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
    cl_model = cnn_cl_model(img_rows=img_rows,
                            img_cols=img_cols,
                            channels=nchannels,
                            nb_filters=64,
                            nb_classes=nb_classes)
    preds_cl = cl_model(x)

    def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_t,
                         x_set,
                         y_set,
                         x_tar_set,
                         args=eval_params_cls)

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_cl,
                         x_t,
                         x_test,
                         y_test,
                         x_test,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    train_params = {
        'nb_epochs': 3,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_cl):
        os.mkdir(train_dir_cl)

    #ckpt = tf.train.get_checkpoint_state(train_dir_cl)
    #print(train_dir_cl, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap_cl = KerasModelWrapper(cl_model)

    if clean_train_cl == True:
        print("Training CNN Classifier")
        loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
        train(sess,
              loss_cl,
              x_train,
              y_train,
              evaluate=evaluate,
              optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001,
                                                  decay=1e-6),
              args=train_params,
              rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("saved model at ", "train_dir/model_cnn_cl.ckpt")

    else:
        print("Loading CNN Classifier")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("Model loaded")
        evaluate()

        # Score trained model.
    '''
  scores = cl_model.evaluate(x_test, y_test, verbose=1)
  print('Test loss:', scores[0])
  print('Test accuracy:', scores[1])
  cl_model_wrap = KerasModelWrapper(cl_model)
` '''
    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(wrap_vae, wrap_cl, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    #adv = sess.run(adv)

    #print("layer names: \n", wrap_vae.get_layer_names())
    recon_orig = wrap_vae.get_layer(x, 'RECON')
    recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs})
    recon_adv = wrap_vae.get_layer(x, 'RECON')
    recon_adv = sess.run(recon_adv, feed_dict={x: adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

    #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
    #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
    #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
    noise = pow(noise, 0.5)
    d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
    d2 = np.sum(
        np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0])
    acc_1 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    acc_2 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        #model_name = 'cifar10_AE_adv.h5'
        #model_path_ae = os.path.join(save_dir, model_name)

        model_ae_adv = ae_model(x,
                                img_rows=img_rows,
                                img_cols=img_cols,
                                channels=nchannels)
        recon = model_ae_adv(x)
        wrap_vae_adv = KerasModelWrapper(model_ae_adv)
        #print("recon: ",recon)
        #print("Defined TensorFlow model graph.")

        print("Training Adversarial AE")
        loss = SquaredError(wrap_vae_adv)
        train_ae(sess,
                 loss_2,
                 x_train_app,
                 x_train_aim,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_ae_adv.ckpt")
        print("saved model")

        cw2 = CarliniWagnerAE(wrap_vae_adv, wrap_cl, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = wrap_vae_adv.get_layer(x, 'RECON')
        recon_orig = wrap_vae_adv.get_layer(x, 'RECON')
        recon_adv = sess.run(recon_adv, {x: adv_2})
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        if targeted:

            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)
        pred_adv_recon = cl_model.get_layer(recon_adv)
        #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        #scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose = 1)
        #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
        #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        #sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_vae_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_vae_adv_trained')

            #return report


#binarization defense
#if(binarization_defense == True or mean_filtering==True):
    if (binarization_defense == True):

        print("BINARIZATION")
        print("---------------------------")
        adv[adv > 0.5] = 1.0
        adv[adv <= 0.5] = 0.0

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        #pred_adv = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        #pred_adv = sess.run(pred_adv, {x: recon_adv})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}
        if targeted:

            noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
            noise = pow(noise, 0.5)
            d1 = np.sum(
                np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
            d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (
                np.shape(adv_inputs)[0])
            acc_1 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
            acc_2 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
            print("noise: ", noise)
            print("d1: ", d1)
            print("d2: ", d2)
            print("classifier acc_target: ", acc_1)
            print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_bin')

    if (mean_filtering == True):

        print("MEAN FILTERING")
        print("---------------------------")
        adv = uniform_filter(adv, 2)

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        pred_adv_recon = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}

        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_mean')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_mean')
Esempio n. 10
0
def train_deflecting(dataset_name=DATASET, train_start=0, train_end=TRAIN_END, test_start=0,
                     test_end=TEST_END, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                     num_capsules_output=NUM_CAPSULES_OUTPUT,
                     output_atoms = OUTPUT_ATOMS,
                     num_routing = NUM_ROUTING,
                     learning_rate=LEARNING_RATE,
                     nb_filters=NB_FILTERS, num_threads=None):
  """
  SVHN cleverhans tutorial to train a deflecting model based on CapsLayer
  :dataset_name: SVHN or CIFAR10
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param num_capsules_output: number of output capsules
  :param output_atoms: size of each capsule vector
  :param num_routing: number of routings in capsule layer
  :param learning_rate: learning rate for training

  """

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get svhn data
  if dataset_name == "SVHN": 
    data = SVHN(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  elif dataset_name == "CIFAR10":
    data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  
  if dataset_name == "SVHN": 
    dataset_train = dataset_train.map(lambda x, y: (random_shift((x)), y), 4)
  elif dataset_name == "CIFAR10":
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')  
  x_test, y_test = data.get_set('test')
  
  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  
  rng = np.random.RandomState([2017, 8, 30])

    
  model = CapsNetRecons(dataset_name, nb_classes, nb_filters, input_shape=[batch_size, img_rows, img_cols, nchannels], num_capsules_output=num_capsules_output, output_atoms=output_atoms, num_routing=num_routing)
  var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dataset_name)
  
  preds = model.get_logits(x)   
  loss = MarginCycLoss(model)

  def evaluate():
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) 
    print('Test accuracy on %s examples: %0.4f' % ("clean", acc))
    return acc
 
  train(sess, loss, None, None,
        dataset_train=dataset_train, dataset_size=dataset_size,
        evaluate=evaluate, args=train_params, rng=rng,
        var_list=var_lists)  
Esempio n. 11
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1,
                     retrain=False):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # start = 6
    # end = 10
    # x_test = x_test[start:end]
    # y_test = y_test[start:end]

    ###########################
    # Adjust hue / saturation #
    ###########################
    # hueValue = 0.9
    # saturationValue = 0.9
    # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, saturationValue), hueValue)
    # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue)
    # x_test = sess.run(tf_x_test)

    ###############################
    # Transform image to uniimage #
    ###############################
    # x_train = convert_uniimage(x_train)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': save_dir,
        'filename': filename,
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 8 / 255, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                ae=None,
                type=None,
                datasetName=None,
                discretizeColor=1):
        accuracy, distortion = model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          x_set,
                                          y_set,
                                          args=eval_params,
                                          is_adv=is_adv,
                                          ae=ae,
                                          type=type,
                                          datasetName=datasetName,
                                          discretizeColor=discretizeColor)
        setattr(report, report_key, accuracy)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' %
                  (report_text, accuracy))

        return accuracy, distortion

    if clean_train:
        model = ModelAllConvolutional('model1',
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])
        # model = UIPModel('model1', nb_classes, nb_filters, input_shape=[32, 32, 3])
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds,
                    x_test,
                    y_test,
                    'clean_train_clean_eval',
                    False,
                    type=type,
                    datasetName="CIFAR10",
                    discretizeColor=discretizeColor)

        # train(sess, loss, None, None,
        #       dataset_train=dataset_train, dataset_size=dataset_size,
        #       evaluate=evaluate, args=train_params, rng=rng,
        #       var_list=model.get_params(), save=save)

        saveFileNumArr = []
        # saveFileNumArr = [50, 500, 1000]

        count = 0
        appendNum = 1000
        while count < 1000:
            count = count + appendNum
            saveFileNumArr.append(count)

        distortionArr = []
        accuracyArr = []
        for i in range(len(saveFileNumArr)):
            saveFileNum = saveFileNumArr[i]
            model_path = os.path.join(save_dir,
                                      filename + "-" + str(saveFileNum))

            print("Trying to load trained model from: " + model_path)
            if os.path.exists(model_path + ".meta"):
                tf_model_load(sess, model_path)
                print("Load trained model")
            else:
                train_with_noise(sess,
                                 loss,
                                 x_train,
                                 y_train,
                                 evaluate=evaluate,
                                 args=train_params,
                                 rng=rng,
                                 var_list=model.get_params(),
                                 save=save,
                                 type=type,
                                 datasetName="CIFAR10",
                                 retrain=retrain,
                                 discretizeColor=discretizeColor)
                retrain = False

            ##########################################
            # Generate semantic adversarial examples #
            ##########################################
            adv_x, y_test2 = color_shift_attack(sess,
                                                x,
                                                y,
                                                np.copy(x_test),
                                                np.copy(y_test),
                                                preds,
                                                args=eval_params,
                                                num_trials=num_trials)
            x_test2 = adv_x
            # convert_uniimage(np.copy(x_test2), np.copy(x_test), discretizeColor)
            accuracy, distortion = do_eval(preds,
                                           np.copy(x_test2),
                                           np.copy(y_test2),
                                           'clean_train_clean_eval',
                                           False,
                                           type=type,
                                           datasetName="CIFAR10",
                                           discretizeColor=discretizeColor)

            # accuracy, distortion = do_eval(preds, np.copy(x_test), np.copy(y_test), 'clean_train_clean_eval', False, type=type,
            #                                datasetName="CIFAR10", discretizeColor=discretizeColor)

            # # Initialize the Fast Gradient Sign Method (FGSM) attack object and
            # # graph
            # fgsm = FastGradientMethod(model, sess=sess)
            # fgsm = BasicIterativeMethod(model, sess=sess)
            # fgsm = MomentumIterativeMethod(model, sess=sess)
            # adv_x = fgsm.generate(x, **fgsm_params)
            # preds_adv = model.get_logits(adv_x)

            # Evaluate the accuracy of the MNIST model on adversarial examples
            # accuracy, distortion = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, type=type)
            # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x, type=type,
            #                                datasetName="CIFAR10", discretizeColor=discretizeColor)

            distortionArr.append(distortion)
            accuracyArr.append(accuracy)
            print(str(accuracy))
            print(str(distortion))

        print("accuracy:")
        for accuracy in accuracyArr:
            print(accuracy)

        print("distortion:")
        for distortion in distortionArr:
            print(distortion)

        # print("hue "+str(hueValue))

    return report
Esempio n. 12
0
def init_defense(sess, x, preds, batch_size, multi_noise=False):
    data = CIFAR10()

    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_train *= 255

    nb_classes = y_train.shape[1]

    n_collect = 1000
    p_ratio_cutoff = .999
    just_detect = True
    clip_alignments = True
    fit_classifier = True
    noise_eps = 'n30.0'
    num_noise_samples = 256

    if multi_noise:
        noises = 'n0.003,s0.003,u0.003,n0.005,s0.005,u0.005,s0.008,n0.008,u0.008'.split(
            ',')
        noise_eps_detect = []
        for n in noises:
            new_noise = n[0] + str(float(n[1:]) * 255)
            noise_eps_detect.append(new_noise)
    else:
        noise_eps_detect = 'n30.0'

    # these attack parameters are just for initializing the defense
    eps = 8.0
    pgd_params = {
        'eps': eps,
        'eps_iter': (eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }

    logits_op = preds.op
    while logits_op.type != 'MatMul':
        logits_op = logits_op.inputs[0].op
    latent_x_tensor, weights = logits_op.inputs
    logits_tensor = preds

    predictor = tf_robustify.collect_statistics(
        x_train[:n_collect],
        y_train[:n_collect],
        x,
        sess,
        logits_tensor=logits_tensor,
        latent_x_tensor=latent_x_tensor,
        weights=weights,
        nb_classes=nb_classes,
        p_ratio_cutoff=p_ratio_cutoff,
        noise_eps=noise_eps,
        noise_eps_detect=noise_eps_detect,
        pgd_eps=pgd_params['eps'],
        pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
        pgd_iters=pgd_params['nb_iter'],
        save_alignments_dir=None,
        load_alignments_dir=None,
        clip_min=pgd_params['clip_min'],
        clip_max=pgd_params['clip_max'],
        batch_size=batch_size,
        num_noise_samples=num_noise_samples,
        debug_dict=None,
        debug=False,
        targeted=False,
        pgd_train=None,
        fit_classifier=fit_classifier,
        clip_alignments=clip_alignments,
        just_detect=just_detect)

    next(predictor)
    return predictor
Esempio n. 13
0
def cifar10_train_on_untargeted(train_start=0, train_end=60000, test_start=0,
                               test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                               learning_rate=LEARNING_RATE,
                               testing=True, adv_training=False,
                               backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                               num_threads=None,threat_model='white_box',
                               model_key='model_1_a',attacker_key='clean',
                               label_smoothing=0.1):
  """
  CIFAR10 cleverhans training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  K.set_learning_phase(0)

  ## Create TF session and set as Keras backend session
  K.set_session(sess)

  # Create a new model and train it to be robust to Attacker
  #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True)
  meta = read_from_meta()
  attacker_meta = meta['attacker'][attacker_key]
  model_meta = meta['model'][model_key]
  attack_type = attacker_meta['attack_type']
  
  if threat_model == 'black_box_A':
    print('Using training set A')
    train_end = int(train_end/2)
    assert 'black_box_A' in meta['model'][model_key]['threat_models']
    dataset_section = 'A'
  elif threat_model == 'black_box_B':
    print('Using training set B')
    train_start = int(train_end/2)
    dataset_section = 'B'
    assert 'black_box_B' in meta['model'][model_key]['threat_models']
  elif threat_model == 'white_box':
    print('Using full training set')
    dataset_section = ''
  else:
    raise NotImplementedError

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  attack_params = {}
  attack_params.update(meta['attacker']['default']['attack_params'])
  attack_params.update(attacker_meta['attack_params'])
  for k,v in attack_params.items():
    if isinstance(v,str):
      attack_params[k] = eval(v)
  if 'meta_key' in attacker_meta.keys() and attack_type == 'advgan':
    folderpath = meta['advgan'][attacker_meta['meta_key']]['train_params']['output_folder']
    attack_params.update({'generator_filepath':os.path.join(folderpath,'generator.hd5')})

  model_filename = model_meta['file_name']
  if 'black_box' in threat_model:
    model_filename = model_filename.replace('cifar10','cifar10B')
  model_filepath=model_meta['folder_path']+'/'+model_filename
  
  keras_model=tf.keras.models.load_model(
    filepath=model_filepath,
    custom_objects=custom_object())
  model = KerasModelWrapper(keras_model)

  def attack_statistics(x_true,x_adv):
    # calculate average L1,L2,Linf norms
    # as well as % of pixels modified
    L1 = tf.reduce_mean(K.sum(K.abs(x_adv-x_true),axis=(-1,-2,-3)))
    L2 = tf.reduce_mean(K.sqrt(K.sum(K.square(x_adv-x_true),axis=(-1,-2,-3))))
    
    Linf = tf.reduce_mean(K.max(K.abs(x_true-x_adv),axis=(-1,-2,-3)))
    eps = tf.constant(1/255,shape=x_true.shape.as_list()[1:])
    mod_perc = 100*tf.reduce_mean(K.cast(K.greater(K.abs(x_true-x_adv),eps),dtype='float'))
    return {'L1':L1,'L2':L2,'Linf':Linf,'%pix':mod_perc}

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  #define attacker
  if attack_type == 'cwl2':
    from cleverhans.attacks import CarliniWagnerL2
    attacker = CarliniWagnerL2(model, sess=sess)
  elif attack_type == 'fgsm':
    from cleverhans.attacks import FastGradientMethod
    attacker = FastGradientMethod(model, sess=sess)
  elif attack_type == 'pgd':
    from cleverhans.attacks import MadryEtAl
    attacker = MadryEtAl(model, sess=sess)
  elif attack_type == 'advgan':
    from cleverhans.attacks.adversarial_gan import AdvGAN
    attacker = AdvGAN(model,sess=sess)
  elif attack_type == None or attack_type=='clean':
    attacker = None
  else:
    print(attack_type+' is not a valid attack type')

  def attack(x):
    if attacker:
      print('attack_params',attack_params)
      return attacker.generate(x,**attack_params)
    else: 
      return x
  loss = CrossEntropy(model, smoothing=label_smoothing, attack=attack)
  preds = model.get_logits(x)
  adv_x = attack(x)

  if not backprop_through_attack:
    # For the fgsm attack used in this tutorial, the attack has zero
    # gradient so enabling this flag does not change the gradient.
    # For some other attacks, enabling this flag increases the cost of
    # training, but gives the defender the ability to anticipate how
    # the attacker will change their strategy in response to updates to
    # the defender's parameters.
    adv_x = tf.stop_gradient(adv_x)
  preds_adv = model.get_logits(adv_x)

  def evaluate():
    # Accuracy of adversarially trained model on legitimate test inputs
    do_eval(preds, x_test, y_test, 'adv_train_clean_eval', False)
    # Accuracy of the adversarially trained model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'adv_train_adv_eval', True)
  
  #print_attack info
  with sess.as_default():
    print('attack type: '+ attack_type)
    attack_stats = attack_statistics(x,adv_x)
    feed_dict={x:x_test[:batch_size],y:y_test[:batch_size]}
    attack_stats_eval = sess.run(attack_stats,feed_dict=feed_dict)
    attack_stats_eval = {k:str(v)[:10] for k,v in attack_stats_eval.items()}
    print(attack_stats_eval)

  if adv_training:
      # Train an CIFAR10 model
    reeval_breaks = 10
    train_params = {
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    nb_e = nb_epochs
    prev_acc = 0
    # Perform and evaluate adversarial training
    for rb in range(reeval_breaks,0,-1):
      train_params.update({'nb_epochs': int(np.ceil(nb_e/rb))})
      if nb_e < train_params['nb_epochs'] < 0:
        train_params['nb_epochs'] = nb_e
      print("Starting training {} of {}".format(nb_epochs-nb_e, nb_epochs))
      train(sess, loss, None, None,
          dataset_train=dataset_train, dataset_size=dataset_size,
          evaluate=evaluate, args=train_params, rng=rng)

      nb_e-=train_params['nb_epochs'] 

      #put accuracies in dictionary fr json serializability 
      report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) 
                      if type(getattr(report,attr)) in [float,np.float32,np.float64]}
      print(report_dict)
      #save to meta
      new_meta = read_from_meta()
      new_model = deepcopy(model_meta)
      new_model.update({'adv_training':True,
                        'attacker_key':attacker_key,
                        'parent_key':model_key,
                        'threat_models':[threat_model],
                        'attack_stats':attack_stats_eval,
                        'report':report_dict,
                        'train_params': {
                          'batch_size': batch_size,
                          'learning_rate': learning_rate,
                          'nb_epochs': nb_epochs-nb_e,
                        },
                        'reeval':False
                       })
      if nb_e > 0:
        new_model.update({'training_finished':False,
          'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train_epoch_'+str(new_model['train_params']['nb_epochs']))})
      else:
        new_model.update({'training_finished':True,
          'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train')})

      new_model_key = get_new_key(model_key,meta)
      new_meta['model'].update({new_model_key:new_model})
      write_to_meta(new_meta)
      
      save_filename = new_model['file_name']
      if 'black_box' in threat_model:
        save_filename = save_filename.replace('cifar10','cifar10'+dataset_section) 
      save_model(keras_model,filepath=new_model['folder_path']+'/'+save_filename)

      if report.adv_train_adv_eval >= 0.9:
        break
      elif report.adv_train_adv_eval <= 0.01:
        #increase_lr
        lr = train_params['learning_rate']
        train_params.update({'learning_rate':lr*1.5})
        print('no learning! Increasing learning rate to {}'
          .format(train_params['learning_rate']))
        
      elif prev_acc<=report.adv_train_adv_eval:
        #update_lr
        lr = train_params['learning_rate']
        train_params.update({'learning_rate':lr*0.8})
        print('decreasing learning rate to {}'
          .format(train_params['learning_rate']))
      prev_acc = copy(report.adv_train_adv_eval)

      if nb_e<=0:
        break

  # Calculate training errors
  elif testing:
    do_eval(preds, x_train, y_train, 'train_adv_train_clean_eval')
    do_eval(preds_adv, x_train, y_train, 'train_adv_train_adv_eval')
    report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) 
                    if type(getattr(report,attr)) in [float,]}
    print('report_dict')
    print(report_dict)
  return report
Esempio n. 14
0
def main(argv=None):
  
  from cleverhans_tutorials import check_installation
  check_installation(__file__)
  
  if not os.path.exists( CONFIG.SAVE_PATH ):
    os.makedirs( CONFIG.SAVE_PATH )
  save_path_data = CONFIG.SAVE_PATH + 'data/'
  if not os.path.exists( save_path_data ):
    os.makedirs( save_path_data )
  model_path = CONFIG.SAVE_PATH + '../all/' +  CONFIG.DATASET + '/'
  if not os.path.exists( model_path ):
    os.makedirs( model_path )
    os.makedirs( model_path + 'data/' )
  
  nb_epochs = FLAGS.nb_epochs
  batch_size = FLAGS.batch_size
  learning_rate = FLAGS.learning_rate
  nb_filters = FLAGS.nb_filters
  len_x = int(CONFIG.NUM_TEST/2)
  
  start = time.time()

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set seeds to improve reproducibility
  if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10':
    tf.set_random_seed(1234)
    np.random.seed(1234)
    rd.seed(1234)
  elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims':
    tf.set_random_seed(13)
    np.random.seed(1234)
    rd.seed(0)          
  
  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
  tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 
  sess = tf.Session(config=tf_config)   
  
  if CONFIG.DATASET == 'mnist':
    # Get MNIST data
    mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
  elif CONFIG.DATASET == 'cifar10':
    # Get CIFAR10 data
    data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')                             
  elif CONFIG.DATASET == 'moon':
    # Create a two moon example
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                   y_train1, x_test1,
                                                                   y_test1)
  elif CONFIG.DATASET == 'dims':
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                     y_train1,x_test1,
                                                                     y_test1)
    x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS)

  np.save(os.path.join(save_path_data, 'x_test'), x_test)
  np.save(os.path.join(save_path_data, 'y_test'), y_test)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': 1}
  rng = np.random.RandomState([2017, 8, 30])
  
  with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi:

    def do_eval(adv_x, preds, x_set, y_set, report_key):
      acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set,
                                          y_set, args=eval_params)
      setattr(report, report_key, acc)
      if report_key:
        print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi)
      return pred_np, adv_x_np
    
    if CONFIG.DATASET == 'mnist':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelBasicCNN('model1', nb_classes, nb_filters)
    elif CONFIG.DATASET == 'cifar10':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                    input_shape=[32, 32, 3])
    elif CONFIG.DATASET == 'moon':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelMLP('model1', nb_classes)
    elif CONFIG.DATASET == 'dims':
      trained_model_path = save_path_data + 'trained_model'
      model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS)
      
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    
    def evaluate():
      _, _ = do_eval(x, preds, x_test, y_test, 'test during train')
    
    if os.path.isfile( trained_model_path + '.index' ):
      tf_model_load(sess, trained_model_path)
    else:
      if CONFIG.DATASET == 'mnist':
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'cifar10':
        train(sess, loss, None, None,
              dataset_train=dataset_train, dataset_size=dataset_size,
              evaluate=evaluate, args=train_params, rng=rng,
              var_list=model.get_params())
      elif CONFIG.DATASET == 'moon':
        train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'dims':
        train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      saver = tf.train.Saver()
      saver.save(sess, trained_model_path)
    
    # Evaluate the accuracy on test examples
    if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ):
      logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy')
    else:
      _, _ = do_eval(x, preds, x_train, y_train, 'train')
      logits_0, _ = do_eval(x, preds, x_test, y_test, 'test')
      np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) 
    
    if CONFIG.DATASET == 'moon':
      num_grid_points = 5000
      if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ):
        x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy')
        logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy')
      else:
        xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) 
        x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T
        y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64')
        x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1)
        logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh')
        x_mesh = np.squeeze(x_mesh)
        np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh)
        np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh)
        
    points_x = x_test[:len_x]
    points_y = y_test[:len_x]
    points_x_bar = x_test[len_x:]
    points_y_bar = y_test[len_x:] 
     
    # Initialize the CW attack object and graph
    cw = CarliniWagnerL2(model, sess=sess) 
    
    # first attack
    attack_params = {
        'learning_rate': CONFIG.CW_LEARNING_RATE,
        'max_iterations': CONFIG.CW_MAX_ITERATIONS
      }
    
    if CONFIG.DATASET == 'moon':
     
      out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path)
      attack_params['const_a_min'] = out_a
      attack_params['const_a_max'] = 100
    
    adv_x = cw.generate(x, **attack_params) 
      
    if os.path.isfile( save_path_data + 'images_once_attacked.npy' ):
      adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy')
      logits_1 = np.load(save_path_data + 'logits_once_attacked.npy')
    else:
      #Evaluate the accuracy on adversarial examples
      preds_adv = model.get_logits(adv_x)
      logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar,
                                    'test once attacked')
      np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1)
      np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1)
      
    # counter attack 
    attack_params['max_iterations'] = 1024
      
    if CONFIG.DATASET == 'moon':  
      
      out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test),
                                                np.squeeze(adv_img_1), model_path,
                                                CONFIG.SAVE_PATH)
      attack_params['learning_rate'] = out_alpha2
      attack_params['const_a_min'] = -1
      attack_params['max_iterations'] = 2048
      
      plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh,
                logits_mesh)
      
    adv_adv_x = cw.generate(x, **attack_params) 
      
    x_k = np.concatenate((points_x, adv_img_1), axis=0)
    y_k = np.concatenate((points_y, logits_1), axis=0)
    
    if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ):
      adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy')
      logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy')
    else:
      # Evaluate the accuracy on adversarial examples
      preds_adv_adv = model.get_logits(adv_adv_x)
      logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k,
                                    'test twice attacked')   
      
      np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2)
      np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2)
    
    if CONFIG.DATASET == 'moon':  
      plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x],
                CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh)
      plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:],
                CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh)
      test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2,
                 CONFIG.SAVE_PATH)
 
  compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x],
                    logits_2[:len_x], CONFIG.SAVE_PATH) 
  
  if x_test.shape[-1] > 1:
    num_axis=(1,2,3)
  else:
    num_axis=(1,2)
    
  D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis)))
  D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]),
                                    axis=num_axis)))
  D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:],
                                logits_0[:len_x], logits_2[:len_x])
      
  if D_p_mod != [] and D_p_p_mod != []:
    plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
      
  plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH)
  
  print('Time needed:', time.time()-start)

  return report
Esempio n. 15
0
def cifar10_tutorial(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    clean_train=CLEAN_TRAIN,
    testing=False,
    backprop_through_attack=BACKPROP_THROUGH_ATTACK,
    nb_filters=NB_FILTERS,
    num_threads=None,
    label_smoothing=0.1,
):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set("train")
    x_test, y_test = data.get_set("test")

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
    }
    eval_params = {"batch_size": batch_size}
    fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = "adversarial"
        else:
            report_text = "legitimate"
        if report_text:
            print("Test accuracy on %s examples: %0.4f" % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional("model1",
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, "clean_train_clean_eval", False)

        train(
            sess,
            loss,
            None,
            None,
            dataset_train=dataset_train,
            dataset_size=dataset_size,
            evaluate=evaluate,
            args=train_params,
            rng=rng,
            var_list=model.get_params(),
        )

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, "train_clean_train_clean_eval")

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval")

        print("Repeating the process, using adversarial training")

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelAllConvolutional("model2",
                                   nb_classes,
                                   nb_filters,
                                   input_shape=[32, 32, 3])
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True)

    # Perform and evaluate adversarial training
    train(
        sess,
        loss2,
        None,
        None,
        dataset_train=dataset_train,
        dataset_size=dataset_size,
        evaluate=evaluate2,
        args=train_params,
        rng=rng,
        var_list=model2.get_params(),
    )

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval")
        do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval")

    return report
Esempio n. 16
0
def mnist_ae(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   num_threads=None,
                   label_smoothing=0.1):
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  rng = np.random.RandomState()

  source_samples = 10
  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  nb_latent_size = 100
  # Get MNIST test data
  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]
  print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  y_t = tf.placeholder( tf.float32, shape=(None, nb_classes))
  #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  '''
  save_dir= 'models'
  model_name = 'cifar10_AE.h5'
  model_path_ae = os.path.join(save_dir, model_name)
  '''
  #model_ae= ae_model(x, img_rows=img_rows, img_cols=img_cols,
   #                 channels=nchannels)
  #recon = model_ae(x)
  #print("recon: ",recon)
  wrap_ae = ModelVAE('wrap_ae')
  recon = wrap_ae.get_layer(x,'RECON')
  print("Defined TensorFlow model graph.")

  def evaluate_ae():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': 128}
    noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recon, x_train, x_train, args=eval_params)
    print("reconstruction distance: ", d1)
  
  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_ae,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  #if not os.path.exists(train_dir_ae):
   # os.mkdir(train_dir_ae)

  #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
  #print(train_dir_ae, ckpt)
  #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  


  if clean_train_vae==True:
    print("Training VAE")
    loss = vae_loss(wrap_ae)
    
    train_ae(sess, loss, x_train, x_train, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
                args=train_params, rng=rng, var_list = wrap_ae.get_params())
    
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_vae_fgsm.ckpt")
    print("saved model")
    

  else:
    print("Loading VAE")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_vae.ckpt")
    evaluate_ae()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': 0.0002,
    }
      #training with the saved model as starting point
      loss = SquaredError(wrap_ae)
      train_ae(sess, loss, x_train, x_train, optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_vae_fgsm.ckpt")

      evaluate_ae()
    
      print("Model loaded and trained for more epochs")

  num_classes = 10
  '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
  cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds_cl = cl_model(x)
  def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None):
    acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls)

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_cl,x_t, x_test, y_test, x_test,args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  train_params = {
      'nb_epochs': 100,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_cl,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  
  wrap_cl = KerasModelWrapper(cl_model)

  if clean_train_cl == True:  
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        #'train_dir': train_dir_cl,
        #'filename': filename
      }
    print("Training CNN Classifier")
    '''
    datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    )
    datagen.fit(x_train)
    '''
    loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
    #for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size = 128):
     # train(sess, loss_cl, x_batch, y_batch, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate,
      #          args=train_params, rng=rng)
    train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
          args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_cnn_cl.ckpt")
    print("saved model at ", "train_dir/model_cnn_cl_fgsm.ckpt")
    
  else:
    print("Loading CNN Classifier")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_cnn_cl.ckpt")
    evaluate()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
      }
      loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
      train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_cl_fgsm.ckpt")
      print("Model loaded and trained further")
      evaluate()


  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a CW attack object
 #cw = CarliniWagnerAE(wrap_ae,wrap_cl, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')
      grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * (nb_classes-1) for instance in x_test[idxs]],
          dtype=np.float32)

      #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])
      
      adv_input_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes-1):
            targ.append(y_test[idxs[curr_num]])
        adv_input_y.append(targ)
      
      adv_input_y = np.array(adv_input_y)

      adv_target_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(y_test[idxs[id]])
        adv_target_y.append(targ)
      
      adv_target_y = np.array(adv_target_y)

      #print("adv_input_y: \n", adv_input_y)
      #print("adv_target_y: \n", adv_target_y)

      adv_input_targets = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(x_test[idxs[id]])
        adv_input_targets.append(targ)
      adv_input_targets = np.array(adv_input_targets)

      adv_inputs = adv_inputs.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))
      adv_input_targets = adv_input_targets.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))

      adv_input_y = adv_input_y.reshape(source_samples*(nb_classes-1), 10)
      adv_target_y = adv_target_y.reshape(source_samples*(nb_classes-1), 10)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    

  adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
  yname = "y_target"

  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }

  fgsm = FastGradientMethodAe(wrap_ae, sess=sess)
  adv = fgsm.generate(x,x_t, **fgsm_params)

  adv = sess.run(adv, {x: adv_inputs, x_t: adv_input_targets})

  recon_orig = wrap_ae.get_layer(x, 'RECON')
  recon_orig = sess.run(recon_orig, feed_dict = {x: adv_inputs})
  recon_adv = wrap_ae.get_layer(x, 'RECON')
  recon_adv = sess.run(recon_adv, feed_dict = {x: adv})
  pred_adv_recon = wrap_cl.get_logits(x)
  pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

  #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
  #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
  #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
  #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
  shape = np.shape(adv_inputs)
  noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
  noise = pow(noise,0.5)
  d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
  d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
  acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
  acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
  print("noise: ", noise)
  print("d1: ", d1)
  print("d2: ", d2)
  print("classifier acc_target: ", acc_1)
  print("classifier acc_true: ", acc_2)

  #print("recon_adv[0]\n", recon_adv[0,:,:,0])
  curr_class = 0
  if viz_enabled:
    for j in range(nb_classes):
      if targeted:
        for i in range(nb_classes):
          #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
          if(i==j):
            grid_viz_data[i,j] = recon_orig[curr_class*9]
            grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
            curr_class = curr_class+1
          else:
            if(j>i):
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
            else:
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]


    #rint(grid_viz_data.shape)

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
  # Finally, block & display a grid of all the adversarial examples
  
  if viz_enabled:
    
    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2')

  if adversarial_training:

    print("starting adversarial training")

    index_shuf = list(range(len(x_train)))
    x_train_target = x_train[index_shuf]
    y_train_target = y_train[index_shuf]
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
    '''
    while len(index_shuf) % batch_size != 0:
      index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
    ''' 
      
    print("len of x_train_target and x_train: ", len(x_train_target), len(x_train))
    for ind in range (0, len(x_train)):
      r_ind = -1
      while(np.argmax(y_train_target[ind])==np.argmax(y_train[ind])):
        r_ind = rng.randint(0,len(x_train))
        y_train_target[ind] = y_train[r_ind]
      if r_ind>-1:  
        x_train_target[ind] = x_train[r_ind]
    wrap_ae2 = ModelVAE('wrap_ae2')
    fgsm2 = FastGradientMethodAe(wrap_ae2, sess=sess)
    adv2 = fgsm.generate(x,x_t, **fgsm_params)

    adv_set = sess.run(adv2, {x: x_train, x_t: x_train_target})
    x_train_aim = np.append(x_train, x_train, axis = 0)
    x_train_app = np.append(x_train, adv_set, axis = 0)
    loss2 =  vae_loss(wrap_ae2)
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate}

    train_ae(sess, loss2, x_train_app,  x_train_aim, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5),
                args=train_params, rng=rng, var_list = wrap_ae2.get_params())

    evaluate_ae()

    adv3 = fgsm2.generate(x, x_t, **fgsm_params)
    adv3 = sess.run(adv3, {x: adv_inputs, x_t: adv_input_targets})
    recon_orig2 = wrap_ae2.get_layer(x, 'RECON')
    recon_orig2 = sess.run(recon_orig2, feed_dict = {x: adv_inputs})
    recon_adv2 = wrap_ae2.get_layer(x, 'RECON')
    recon_adv2 = sess.run(recon_adv2, feed_dict = {x: adv3})
    pred_adv_recon2 = wrap_cl.get_logits(x)
    pred_adv_recon2 = sess.run(pred_adv_recon2, {x:recon_adv2})

    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv3-adv_inputs))/(np.shape(adv3)[0])
    noise = pow(noise,0.5)
    d1 = np.sum(np.square(recon_adv2-adv_inputs))/(np.shape(adv_inputs)[0])
    d2 = np.sum(np.square(recon_adv2-adv_input_targets))/(np.shape(adv_inputs)[0])
    acc_1 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
    acc_2 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
        if targeted:
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig2[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j]


      #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples
    
    if viz_enabled:
      
      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig1')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig2')
    


  #return report
  if binarization:

    print("----------------")
    print("BINARIZATION")

    adv[adv>0.5] = 1.0
    adv[adv<=0.5] = 0.0
    
     
    recon_orig = wrap_ae.get_layer(x, 'RECON')
    recon_adv = wrap_ae.get_layer(x, 'RECON')
    #pred_adv = wrap_cl.get_logits(x)
    recon_orig = sess.run(recon_orig, {x: adv_inputs})
    recon_adv = sess.run(recon_adv, {x: adv})
    #pred_adv = sess.run(pred_adv, {x: recon_adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

    eval_params = {'batch_size': 90}
    if targeted:
     
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
      

    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1_bin')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2_bin')

  if(mean_filtering ==True):

      print("----------------")
      print("MEAN FILTERING")

      adv = uniform_filter(adv, 2)

      recon_orig = wrap_ae.get_layer(x, 'RECON')
      recon_adv = wrap_ae.get_layer(x, 'RECON')
      pred_adv_recon = wrap_cl.get_logits(x)
      recon_orig = sess.run(recon_orig, {x: adv_inputs})
      recon_adv = sess.run(recon_adv, {x: adv})
      pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

      eval_params = {'batch_size': 90}
      
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


      curr_class = 0
      if viz_enabled:
        for j in range(nb_classes):
            for i in range(nb_classes):
              #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
              if(i==j):
                grid_viz_data[i,j] = recon_orig[curr_class*9]
                grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
                curr_class = curr_class+1
              else:
                if(j>i):
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
                else:
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
        

      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig1_mean')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig2_mean')
def generate_CIFAR10_adv(attacker_name, train_start=0, train_end=60000, test_start=0,
                         test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                         learning_rate=LEARNING_RATE,
                         clean_train=CLEAN_TRAIN,
                         testing=False,
                         nb_filters=NB_FILTERS, num_threads=None,
                         label_smoothing=0.1, args=FLAGS):
    """
    CIFAR10 cleverhans tutorial
    :param attacker_name:
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    if "batch_size" in ATTACK_PARAM[attacker_name]:
        global BATCH_SIZE
        batch_size = ATTACK_PARAM[attacker_name]["batch_size"]
        BATCH_SIZE = batch_size

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    config_args = {}
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    config_args["gpu_options"] = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    # Get CIFAR10 data
    data = CIFAR(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(BATCH_SIZE, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(BATCH_SIZE, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}

    rng = np.random.RandomState([2017, 8, 30])

    def do_generate_eval(adv_x, pred_adv_x, x_set, y_set, report_key, is_adv=None):
        adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval(sess, x, y, adv_x,
                                                                                                    pred_adv_x, x_set,
                                                                                                    y_set,
                                                                                                    args=eval_params)

        setattr(report, report_key, success_rate)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('adversarial attack successful rate on %s: %0.4f' % (report_text, success_rate))
        return adv_images_total, adv_pred_total, gt_label_total, success_rate  # shape = (total, H,W,C)

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                      input_shape=[32, 32, 3])
        preds = model.get_logits(x)  # tf.tensor

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        resume_files = os.listdir(args.resume)
        loss = CrossEntropy(model, smoothing=label_smoothing)
        if len(resume_files) == 0:
            saver = tf.train.Saver()
            train(sess, loss, None, None,
                  dataset_train=dataset_train, dataset_size=dataset_size,
                  evaluate=evaluate, args=train_params, rng=rng,
                  var_list=model.get_params())  # 训练nb_epochs个epochs
            save_path = saver.save(sess, "{}/model".format(args.resume), global_step=nb_epochs)
            print("Model saved in path: %s" % save_path)
        else:
            # resume from old
            latest_checkpoint = tf.train.latest_checkpoint(args.resume)
            saver = tf.train.Saver()
            saver.restore(sess, latest_checkpoint)

        # Calculate training error
        if testing:
            evaluate()

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        attacker = ATTACKERS[attacker_name](model, sess=sess)
        param_dict = ATTACK_PARAM[attacker_name]
        print("begin generate adversarial examples of CIFAR-10 using attacker: {}".format(attacker_name))
        adv_x = attacker.generate(x, **param_dict)  # tensor
        preds_adv = model.get_logits(adv_x)
        # generate adversarial examples

        adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_train,
                                                                                          y_train,
                                                                                          "clean_train_adv_eval", True)
        print("attacker: {} attack successful rate for CIFAR-10 train dataset is {}".format(attacker_name, success_rate))
        adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_test,
                                                                                          y_test, "clean_test_adv_eval",
                                                                                          True)
        print("attacker: {} attack successful rate for CIFAR-10 test dataset is {}".format(attacker_name, success_rate))

    return report
Esempio n. 18
0
def cifar10_tutorial(train_start=0,
                     train_end=50000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.13, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)

    if clean_train:
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # save model
        #saver = tf.train.Saver()
        #saver.save(sess, "./checkpoint_dir/clean_model_100.ckpt")

        # load model and compute testing accuracy
    if testing:
        tf_model_load(sess, file_path="./checkpoint_dir/clean_model_100.ckpt")
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)

    # Evaluate the accuracy of the CIFAR10 model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

    # generate and show adversarial samples
    x_test_adv = np.zeros(shape=x_test.shape)

    for i in range(10):
        x_test_adv[i * 1000:(i + 1) * 1000] = adv_x.eval(
            session=sess, feed_dict={x: x_test[i * 1000:(i + 1) * 1000]})

    # implement anisotropic diffusion on adversarial samples
    x_test_filtered = np.zeros(shape=x_test_adv.shape)
    for i in range(y_test.shape[0]):
        x_test_filtered[i] = filter.anisotropic_diffusion(x_test_adv[i])

    # implement median on adversarial samples
    # x_test_filtered_med = np.zeros(shape=x_test_adv.shape)
    # for i in range(y_test.shape[0]):
    #     x_test_filtered_med[i] = medfilt(x_test_filtered_ad[i], kernel_size=(3,3,1))

    acc = model_eval(sess,
                     x,
                     y,
                     preds,
                     x_test_filtered,
                     y_test,
                     args=eval_params)
    print("acc after anisotropic diffusion is {}".format(acc))

    return report