예제 #1
0
        z1[i] = tf.matmul(h1[i - 1], w[i]) + b[i]
        h1[i] = tf.nn.relu(z1[i])
    if i == n - 1:
        z1[i] = tf.matmul(h1[i - 1], w[i]) + b[i]
        h1[i] = z1[i]

y = classifier(x)
yn = classifier_n(x)
loss_cls = softmax_loss(label, yn)
all_vars = tf.trainable_variables()
c_vars = [var for var in all_vars if 'classifier' in var.name]
train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss_cls, var_list = c_vars, global_step = global_step)

jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
x_fgsm = attack.fgsm(x, y, eps=0.2, clip_min=0, clip_max=1)


def main():
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        acc = {}
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1]}
            sess.run(train_op_classifier, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                acc['benign'] = sess.run(get_acc(x, label),
예제 #2
0
def model_train(para):
    sess = tf.Session()
    tf.set_random_seed(random_seed)
    n = len(layers)
    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label
    std = para['std']

    w, b = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(1, n):
        w[i] = weight_variable([layers[i - 1], layers[i]])
        b[i] = bias_variable([layers[i]])

    # model with noise
    z, h = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(n):
        if i == 0:
            z[i] = x
            z[i] += tf.random_normal(shape=tf.shape(z[i]),
                                     mean=0.0,
                                     stddev=std[i],
                                     dtype=tf.float32)
            z[i] = tf.clip_by_value(z[i], 0, 1)
            h[i] = z[i]
        if i > 0 and i < n - 1:
            z[i] = tf.matmul(h[i - 1], w[i]) + b[i]
            #z[i] = tf.clip_by_norm(z[i], 1, axes = 1)
            z[i] += tf.random_normal(shape=tf.shape(z[i]),
                                     mean=0.0,
                                     stddev=std[i],
                                     dtype=tf.float32)
            h[i] = tf.nn.relu(z[i])
        if i == n - 1:
            z[i] = tf.matmul(h[i - 1], w[i]) + b[i]
            #z[i] = tf.clip_by_norm(z[i], 1000, axes = 1)
            z[i] += tf.random_normal(shape=tf.shape(z[i]),
                                     mean=0.0,
                                     stddev=std[i],
                                     dtype=tf.float32)
            h[i] = z[i]
    y = h[n - 1]

    w_sum = tf.constant(0, dtype='float32')
    for i in range(1, n):
        w_sum += tf.reduce_sum(tf.square(w[i]))

    # gradient descent
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y))
    gw, gb = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(1, n):
        gw[i] = tf.gradients(loss, w[i])[0]
        gb[i] = tf.gradients(loss, b[i])[0]
    opt = GradientDescentOptimizer(learning_rate=learning_rate)
    gradients = []
    for i in range(1, n):
        gradients.append((gw[i], w[i]))
        gradients.append((gb[i], b[i]))
    train_step = opt.apply_gradients(gradients)

    # model without noise
    z2, h2 = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(n):
        if i == 0:
            z2[i] = x
            h2[i] = z2[i]
        if i > 0 and i < n - 1:
            z2[i] = tf.matmul(h2[i - 1], w[i]) + b[i]
            h2[i] = tf.nn.relu(z2[i])
        if i == n - 1:
            z2[i] = tf.matmul(h2[i - 1], w[i]) + b[i]
            h2[i] = z2[i]
    y2 = h2[n - 1]

    # attack
    x_adv = attack.fgsm(x, y2, eps=0.3, clip_min=0, clip_max=1)

    #evaluation
    acc = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y2, 1), tf.argmax(label, 1)), tf.float32))

    # data
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    x_adv_mnist_fsgm = np.load(os.path.join('data', 'x_adv_mnist_fsgm.npy'))

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={x: batch[0], label: batch[1]})
            if t % int(1 / sample_rate) == 0:
                epoch = int(t / int(1 / sample_rate))

                x_adv_sample = sess.run(x_adv,
                                        feed_dict={
                                            x: mnist.test.images,
                                            label: mnist.test.labels
                                        })
                acc_benign = sess.run(acc,
                                      feed_dict={
                                          x: mnist.test.images,
                                          label: mnist.test.labels
                                      })
                acc_adv = sess.run(acc,
                                   feed_dict={
                                       x: x_adv_sample,
                                       label: mnist.test.labels
                                   })
                acc_pre_adv = sess.run(acc,
                                       feed_dict={
                                           x: x_adv_mnist_fsgm,
                                           label: mnist.test.labels
                                       })
                print(epoch, acc_benign, acc_adv, acc_pre_adv)
                check = tf.reduce_mean(tf.norm(y2, axis=1))
                print(
                    sess.run([check],
                             feed_dict={
                                 x: mnist.test.images,
                                 label: mnist.test.labels
                             }))
예제 #3
0
def main():
    sess = tf.Session()
    train_size, test_size = 55000, 10000
    batch_size = 100
    lr = 0.05
    epochs = 100
    steps = epochs * int(train_size / batch_size)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    x = tf.placeholder(tf.float32, [None, 784])  # input for real images
    x_adv = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])  # groundtruth class label
    y_target = tf.placeholder(tf.float32, [None, 10])
    noise = tf.placeholder(tf.float32, [None, 100])

    tx = transformer(x, noise, 3)
    yx_2, z_norm = classifier_x(x)
    yx = classifier(x)
    ytx = classifier(tx)
    ytx_2, z_norm2 = classifier_x(tx)
    y_x_adv = classifier(x_adv)

    x_fgsm = attack.fgsm(x, yx_2, eps=0.1, clip_min=0, clip_max=1)
    y_x_fgsm = classifier(x_fgsm)

    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
    one_hot_target = np.zeros((100, 10), dtype=np.float32)
    one_hot_target[:, 1] = 1
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'nb_classes': 10,
        'clip_min': 0.,
        'clip_max': 1.,
        'targets': yx,
        'y_val': one_hot_target
    }

    perturb = {}
    perturb['tx'] = tf.reduce_mean(tf.norm(tx - x, axis=1))
    perturb['fgsm'] = tf.reduce_mean(tf.norm(x_fgsm - x, axis=1))

    loss = {}
    loss['cx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=yx_2))
    loss['ctx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=ytx_2))

    loss['cttx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_target, logits=ytx))

    loss['classifier'] = loss['cx']  #+ loss['ctx']
    #loss['transformer'] = loss['cttx']
    loss['transformer'] = -loss['ctx']

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    t_vars = [var for var in all_vars if 'transformer' in var.name]

    train_op = {}
    train_op['classifier'] = GradientDescentOptimizer(learning_rate = lr) \
        .minimize(loss['classifier'], var_list = c_vars, global_step = global_step)
    train_op['transformer'] = GradientDescentOptimizer(learning_rate = lr) \
        .minimize(loss['transformer'], var_list = t_vars, global_step = global_step)

    acc = {}
    acc['x'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(yx, 1), tf.argmax(y_, 1)), tf.float32))
    acc['tx'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(ytx, 1), tf.argmax(y_, 1)), tf.float32))
    acc['x_fgsm'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y_x_fgsm, 1), tf.argmax(y_, 1)),
                tf.float32))
    acc['x_adv'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y_x_adv, 1), tf.argmax(y_, 1)), tf.float32))

    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    x_adv_mnist_fsgm = np.load(os.path.join('data', 'x_fgsm_mnist.npy'))
    tf.set_random_seed(1024)

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            y_tar = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0] for i in range(batch_size)]
            y_tar = np.array(y_tar, dtype='float32')

            noise_sample = sample_Z(batch_size, 100)

            sess.run(train_op['classifier'],
                     feed_dict={
                         x: batch[0],
                         y_: batch[1],
                         noise: noise_sample,
                         y_target: y_tar
                     })
            sess.run(train_op['transformer'],
                     feed_dict={
                         x: batch[0],
                         y_: batch[1],
                         noise: noise_sample,
                         y_target: y_tar
                     })

            if t % int(train_size / batch_size) == 0:
                epoch = int(t / int(train_size / batch_size))

                noise_sample2 = sample_Z(10000, 100)
                test_batch = mnist.test.next_batch(10000)
                print(test_batch[0].shape)
                var_list = [acc, z_norm]
                res = sess.run(var_list, feed_dict = {x: test_batch[0], y_: test_batch[1], noise : noise_sample2, \
                    x_adv: x_adv_mnist_fsgm, y_target: y_tar})
                print(epoch)
                for r in res:
                    print(r)
def main():
    random_seed = 1024
    train_size, test_size = 55000, 10000
    batch_size = 100
    learning_rate = 0.05
    epochs = 20
    steps = epochs * 550
    sess = tf.Session()
    global_step = tf.Variable(0, name = 'global_step', trainable = False)
    tf.set_random_seed(random_seed)
    saver = tf.train.Saver()

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label
    noise = tf.placeholder(tf.float32, [None, 100]) # noise vector
    y_target = tf.placeholder(tf.float32, [None, 10]) # target label

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])
    
    y = classifier(x)

    # gan 
    x_gan = generator(x, noise, 4)
    y_gan = classifier(x_gan)
    
    loss = softmax_loss(label, y)
    loss_gan = - softmax_loss(label, y_gan)

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    g_vars = [var for var in all_vars if 'generator' in var.name]
    train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss, var_list = c_vars, global_step = global_step)
    train_op_generator = GradientDescentOptimizer(learning_rate = 0.05) \
        .minimize(loss_gan, var_list = g_vars, global_step = global_step)

    #fgsm 
    x_fgsm = attack.fgsm(x, y, eps = 0.2, clip_min=0, clip_max=1)
    y_fgsm = classifier(x_fgsm)
    # jsma
    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
    
    # train
    
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    

    y_target_batch = np.zeros((100, 10), dtype=np.float32) 
    y_target_batch[:, 0] = 1.0
    y_target_test = np.zeros((10000, 10), dtype=np.float32) 
    y_target_test[:, 0] = 1.0

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_op_classifier, feed_dict={x: batch[0], label: batch[1]})
            if t % 550 == 0:
                epoch = int(t / 550)
                acc_benign = sess.run(get_acc(x, label), feed_dict={x: mnist.test.images, label: mnist.test.labels})
                print(epoch, acc_benign)
        
        print('train gan')
        for t in range(1, 550 * 5 + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
            sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                f_dict = {x: mnist.test.images, label: mnist.test.labels, noise: sample_Z(10000, 100), y_target: y_target_batch}
                x_gan_d = sess.run(x_gan, feed_dict=f_dict)
                f_dict = {x: x_gan_d, label: mnist.test.labels}
                acc_gan = sess.run(get_acc(x, label), feed_dict=f_dict)
                print(epoch, acc_gan)

        checkpoint_path = os.path.join('model', 'basic_model.ckpt')
        #saver.save(sess, checkpoint_path, global_step = 1)


        print('generate adv samples for the first batch of the testing set')
        # real
        x_real_mnist_1 = mnist.test.images[0:100,]
        np.save(os.path.join('data','x_real_mnist_1.npy'), x_real_mnist_1)
        x_real_mnist_1_r = x_real_mnist_1.reshape([100, 28, 28])
        save_images(x_real_mnist_1_r, [10, 10], os.path.join('img', 'x_real_mnist_1.png'))
        # fgsm
        x_fgsm_mnist_1 = sess.run(x_fgsm, feed_dict = {x: mnist.test.images[0:100,], label: mnist.test.labels[0:100,]})
        np.save(os.path.join('data','x_fgsm_mnist_1.npy'), x_fgsm_mnist_1)
        x_fgsm_mnist_1_r = x_fgsm_mnist_1.reshape([100, 28, 28])
        save_images(x_fgsm_mnist_1_r, [10, 10], os.path.join('img', 'x_fgsm_mnist_1.png'))
        #jsma
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_batch}
        x_jsma_mnist_1 = jsma.generate_np(mnist.test.images[0:100,], **jsma_params)
        np.save(os.path.join('data','x_jsma_mnist_1.npy'), x_jsma_mnist_1)
        acc_jsma_1 = sess.run(get_acc(x, label), feed_dict={x: x_jsma_mnist_1, label: mnist.test.labels[0:100,]})
        x_jsma_mnist_1_r = x_jsma_mnist_1.reshape([100, 28, 28])
        save_images(x_jsma_mnist_1_r, [10, 10], os.path.join('img', 'x_jsma_mnist_1.png'))
        
        x_gan_mnist_1 = sess.run(x_gan, feed_dict={x: mnist.test.images[0:100,] ,label: mnist.test.labels[0:100,]\
            , noise: sample_Z(batch_size, 100), y_target: y_target_batch})
        np.save(os.path.join('data','x_gan_mnist_1.npy'), x_gan_mnist_1)
        x_gan_mnist_1_r = x_gan_mnist_1.reshape([100, 28, 28])
        save_images(x_gan_mnist_1_r, [10, 10], os.path.join('img', 'x_gan_mnist_1.png'))

        diff_fgsm = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_fgsm_mnist_1})
        diff_jsma = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_jsma_mnist_1})
        diff_gan = sess.run(diff(x1, x2), feed_dict={x1: x_real_mnist_1, x2: x_gan_mnist_1})
        print('perturb: fgsm: {:.3f}, jsma: {:.3f}, gan: {:.3f}'.format(diff_fgsm, diff_jsma, diff_gan))

        acc_benign = sess.run(get_acc(x, label), feed_dict={x: mnist.test.images, label: mnist.test.labels})

        print('generate adv samples for the entire testing set')
        # fgsm
        x_fgsm_mnist = sess.run(x_fgsm, feed_dict = {x: mnist.test.images, label: mnist.test.labels})
        np.save(os.path.join('data','x_fgsm_mnist.npy'), x_fgsm_mnist)
        acc_fgsm = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_mnist, label: mnist.test.labels})
        
        # gan
        x_gan_mnist = sess.run(x_gan, feed_dict={x: mnist.test.images ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        np.save(os.path.join('data','x_gan_mnist.npy'), x_gan_mnist)
        acc_gan = sess.run(get_acc(x, label), feed_dict={x: x_gan_mnist ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})

        print('accuracy: benign: {:.3f}, fgsm: {:.3f}, jsma: {:.3f}, gan: {:.3f}'.format(acc_benign, acc_fgsm, acc_jsma_1, acc_gan))

        '''
        x_fgsm_mnist = np.load(os.path.join('data','x_fgsm_mnist.npy'))
        x_gan_mnist = np.load(os.path.join('data','x_gan_mnist.npy'))
        x_jsma_mnist_1 = np.load(os.path.join('data','x_jsma_mnist_1.npy'))
        sess.run(tf.global_variables_initializer())
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_op_classifier, feed_dict={x: batch[0], label: batch[1]})
            if t % 550 == 0:
                epoch = int(t / 550)
                acc = {}
                acc['benign'] = sess.run(get_acc(x, label), feed_dict = {x: mnist.test.images, label: mnist.test.labels})
                acc['pre fgsm'] = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_mnist, label: mnist.test.labels})
                acc['pre gan'] = sess.run(get_acc(x, label), feed_dict={x: x_gan_mnist, label: mnist.test.labels})
                acc['pre jsma 1'] = sess.run(get_acc(x, label), feed_dict={x: x_jsma_mnist_1, label: mnist.test.labels[0:100,]})
                print(epoch, acc)
        '''
        sess.close()
        return
        # jsma
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_test}
        x_jsma_mnist = jsma.generate_np(mnist.test.images, **jsma_params)
        np.save(os.path.join('data','x_jsma_mnist.npy'), x_jsma_mnist)
def main():
    sess = tf.Session()
    global_step = tf.Variable(0, name='global_step', trainable=False)
    tf.set_random_seed(random_seed)
    saver = tf.train.Saver()

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label
    y_target = tf.placeholder(tf.float32, [None, 10])  # noise vector

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])

    y_1 = model_1(x)
    y_2 = model_2(x)
    y_3 = model_3(x)

    loss_1 = softmax_loss(label, y_1)
    loss_2 = softmax_loss(label, y_2)
    loss_3 = softmax_loss(label, y_3)

    all_vars = tf.trainable_variables()
    model_1_vars = [var for var in all_vars if 'model_1' in var.name]
    model_2_vars = [var for var in all_vars if 'model_2' in var.name]
    model_3_vars = [var for var in all_vars if 'model_3' in var.name]

    train_op_1 = GradientDescentOptimizer(
        learning_rate=learning_rate).minimize(loss_1,
                                              var_list=model_1_vars,
                                              global_step=global_step)
    train_op_2 = GradientDescentOptimizer(
        learning_rate=learning_rate).minimize(loss_2,
                                              var_list=model_2_vars,
                                              global_step=global_step)
    train_op_3 = GradientDescentOptimizer(
        learning_rate=learning_rate).minimize(loss_3,
                                              var_list=model_3_vars,
                                              global_step=global_step)

    x_fgsm_1 = attack.fgsm(x, y_1, eps=0.3, clip_min=0, clip_max=1)
    x_fgsm_2 = attack.fgsm(x, y_2, eps=0.3, clip_min=0, clip_max=1)
    x_fgsm_3 = attack.fgsm(x, y_3, eps=0.3, clip_min=0, clip_max=1)

    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    y_target = np.zeros((10000, 10), dtype=np.float32)
    y_target[:, 0] = 1.0

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        print('train models')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_op_1, feed_dict={x: batch[0], label: batch[1]})
            sess.run(train_op_2, feed_dict={x: batch[0], label: batch[1]})
            sess.run(train_op_3, feed_dict={x: batch[0], label: batch[1]})
            if t % 550 == 0:
                epoch = int(t / 550)
                acc_1 = sess.run(accuracy(label, y_1),
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
                acc_2 = sess.run(accuracy(label, y_2),
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
                acc_3 = sess.run(accuracy(label, y_3),
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
                print(epoch, acc_1, acc_2, acc_3)

        print('generate adv samples')
        # fgsm
        x_fgsm_1_data = sess.run(x_fgsm_1,
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_1_data.npy'),
                x_fgsm_1_data)
        acc_fgsm_1 = sess.run(accuracy(label, y_1),
                              feed_dict={
                                  x: x_fgsm_1_data,
                                  label: mnist.test.labels
                              })

        x_fgsm_2_data = sess.run(x_fgsm_2,
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_2_data.npy'),
                x_fgsm_2_data)
        acc_fgsm_2 = sess.run(accuracy(label, y_2),
                              feed_dict={
                                  x: x_fgsm_2_data,
                                  label: mnist.test.labels
                              })

        x_fgsm_3_data = sess.run(x_fgsm_3,
                                 feed_dict={
                                     x: mnist.test.images,
                                     label: mnist.test.labels
                                 })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_3_data.npy'),
                x_fgsm_3_data)
        acc_fgsm_3 = sess.run(accuracy(label, y_3),
                              feed_dict={
                                  x: x_fgsm_3_data,
                                  label: mnist.test.labels
                              })

        x_data = mnist.test.images

        x_perturb_data = x_data + np.random.normal(
            loc=0.0, scale=0.1, size=[10000, 784])
        x_perturb_data = np.clip(x_perturb_data, 0, 1)

        x_fgsm_rd_1_data = sess.run(x_fgsm_1,
                                    feed_dict={
                                        x: x_perturb_data,
                                        label: mnist.test.labels
                                    })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_rd_1_data.npy'),
                x_fgsm_rd_1_data)
        acc_fgsm_rd_1 = sess.run(accuracy(label, y_1),
                                 feed_dict={
                                     x: x_fgsm_rd_1_data,
                                     label: mnist.test.labels
                                 })

        x_fgsm_rd_2_data = sess.run(x_fgsm_2,
                                    feed_dict={
                                        x: x_perturb_data,
                                        label: mnist.test.labels
                                    })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_rd_2_data.npy'),
                x_fgsm_rd_2_data)
        acc_fgsm_rd_2 = sess.run(accuracy(label, y_2),
                                 feed_dict={
                                     x: x_fgsm_rd_2_data,
                                     label: mnist.test.labels
                                 })

        x_fgsm_rd_3_data = sess.run(x_fgsm_3,
                                    feed_dict={
                                        x: x_perturb_data,
                                        label: mnist.test.labels
                                    })
        np.save(os.path.join('data', 'mnist', 'x_fgsm_rd_3_data.npy'),
                x_fgsm_rd_3_data)
        acc_fgsm_rd_3 = sess.run(accuracy(label, y_3),
                                 feed_dict={
                                     x: x_fgsm_rd_3_data,
                                     label: mnist.test.labels
                                 })

        x_fgsm_it_1_data = np.copy(mnist.test.images)
        x_fgsm_it_2_data = np.copy(mnist.test.images)
        x_fgsm_it_3_data = np.copy(mnist.test.images)

        for _ in range(10):
            grad_1 = sess.run(tf.gradients(loss_1, x)[0],
                              feed_dict={
                                  x: x_fgsm_it_1_data,
                                  label: mnist.test.labels
                              })
            x_fgsm_it_1_data += np.sign(grad_1) * 0.01
            x_fgsm_it_1_data = np.clip(x_fgsm_it_1_data, 0, 1)

            grad_2 = sess.run(tf.gradients(loss_2, x)[0],
                              feed_dict={
                                  x: x_fgsm_it_2_data,
                                  label: mnist.test.labels
                              })
            x_fgsm_it_2_data += np.sign(grad_2) * 0.01
            x_fgsm_it_2_data = np.clip(x_fgsm_it_2_data, 0, 1)

            grad_3 = sess.run(tf.gradients(loss_3, x)[0],
                              feed_dict={
                                  x: x_fgsm_it_3_data,
                                  label: mnist.test.labels
                              })
            x_fgsm_it_3_data += np.sign(grad_3) * 0.01
            x_fgsm_it_3_data = np.clip(x_fgsm_it_3_data, 0, 1)

        np.save(os.path.join('data', 'mnist', 'x_fgsm_it_1_data.npy'),
                x_fgsm_it_1_data)
        acc_fgsm_it_1 = sess.run(accuracy(label, y_1),
                                 feed_dict={
                                     x: x_fgsm_it_1_data,
                                     label: mnist.test.labels
                                 })

        np.save(os.path.join('data', 'mnist', 'x_fgsm_it_2_data.npy'),
                x_fgsm_it_2_data)
        acc_fgsm_it_2 = sess.run(accuracy(label, y_2),
                                 feed_dict={
                                     x: x_fgsm_it_2_data,
                                     label: mnist.test.labels
                                 })

        np.save(os.path.join('data', 'mnist', 'x_fgsm_it_3_data.npy'),
                x_fgsm_it_3_data)
        acc_fgsm_it_3 = sess.run(accuracy(label, y_3),
                                 feed_dict={
                                     x: x_fgsm_it_3_data,
                                     label: mnist.test.labels
                                 })

        grad_tg_1 = sess.run(tf.gradients(loss_1, x)[0],
                             feed_dict={
                                 x: mnist.test.images,
                                 label: y_target
                             })
        x_fgsm_tg_1_data = mnist.test.images - np.sign(grad_tg_1) * 0.3
        x_fgsm_tg_1_data = np.clip(x_fgsm_tg_1_data, 0, 1)

        grad_tg_2 = sess.run(tf.gradients(loss_2, x)[0],
                             feed_dict={
                                 x: mnist.test.images,
                                 label: y_target
                             })
        x_fgsm_tg_2_data = mnist.test.images - np.sign(grad_tg_2) * 0.3
        x_fgsm_tg_2_data = np.clip(x_fgsm_tg_2_data, 0, 1)

        grad_tg_3 = sess.run(tf.gradients(loss_3, x)[0],
                             feed_dict={
                                 x: mnist.test.images,
                                 label: y_target
                             })
        x_fgsm_tg_3_data = mnist.test.images - np.sign(grad_tg_3) * 0.3
        x_fgsm_tg_3_data = np.clip(x_fgsm_tg_3_data, 0, 1)

        np.save(os.path.join('data', 'mnist', 'x_fgsm_tg_1_data.npy'),
                x_fgsm_tg_1_data)
        acc_fgsm_tg_1 = sess.run(accuracy(label, y_1),
                                 feed_dict={
                                     x: x_fgsm_tg_1_data,
                                     label: mnist.test.labels
                                 })

        np.save(os.path.join('data', 'mnist', 'x_fgsm_tg_2_data.npy'),
                x_fgsm_tg_2_data)
        acc_fgsm_tg_2 = sess.run(accuracy(label, y_2),
                                 feed_dict={
                                     x: x_fgsm_tg_2_data,
                                     label: mnist.test.labels
                                 })

        np.save(os.path.join('data', 'mnist', 'x_fgsm_tg_3_data.npy'),
                x_fgsm_tg_3_data)
        acc_fgsm_tg_3 = sess.run(accuracy(label, y_3),
                                 feed_dict={
                                     x: x_fgsm_tg_3_data,
                                     label: mnist.test.labels
                                 })

        print('Accuracy fgsm bl: {:.4f}, {:.4f}, {:.4f}'.format(
            acc_fgsm_1, acc_fgsm_2, acc_fgsm_3))
        print('Accuracy fgsm rd: {:.4f}, {:.4f}, {:.4f}'.format(
            acc_fgsm_rd_1, acc_fgsm_rd_2, acc_fgsm_rd_3))
        print('Accuracy fgsm it: {:.4f}, {:.4f}, {:.4f}'.format(
            acc_fgsm_it_1, acc_fgsm_it_2, acc_fgsm_it_3))
        print('Accuracy fgsm tg: {:.4f}, {:.4f}, {:.4f}'.format(
            acc_fgsm_tg_1, acc_fgsm_tg_2, acc_fgsm_tg_3))

    return
예제 #6
0
    'learning_rate': 0.001,
}
model_train(sess, x, y, predictions, X_train, Y_train, args=train_params)


# Evaluate the MNIST model
eval_params = {'batch_size': 128}
accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params)
print('Test accuracy on legitimate test examples: ' + str(accuracy))
# Test accuracy on legitimate test examples: 0.9888


# Craft adversarial examples using the Fast Gradient Sign Method
from cleverhans.attacks_tf import fgsm
from cleverhans.utils_tf import batch_eval
adv_x = fgsm(x, predictions, eps=0.3)
X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test, args=eval_params)
print('Test accuracy on adversarial examples: ' + str(accuracy))
# Test accuracy on adversarial examples: 0.0837


# Adversarial training
model_2 = cnn_model()
predictions_2 = model_2(x)
adv_x_2 = fgsm(x, predictions_2, eps=0.3)
predictions_2_adv = model_2(adv_x_2)
model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, args=train_params)


# Evaluate the accuracy on legitimate examples
예제 #7
0
def main():
    random_seed = 1024
    train_size, test_size = 55000, 10000
    batch_size = 100
    learning_rate = 0.05
    epochs = 10
    steps = epochs * 550
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    sess = tf.InteractiveSession(config=config)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    tf.set_random_seed(random_seed)

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label

    x_perturb = x + tf.random_normal(
        shape=tf.shape(x), mean=0.0, stddev=0.5, dtype=tf.float32)
    x_perturb = tf.clip_by_value(x_perturb, 0, 1)

    y = classifier(x)
    y_ = classifier(x_perturb)
    y_perturb = y_ + tf.random_normal(
        shape=tf.shape(y), mean=0.0, stddev=300.0, dtype=tf.float32)

    #fgsm
    x_fgsm = attack.fgsm(x, y, eps=0.2, clip_min=0, clip_max=1)
    y_fgsm = classifier(x_fgsm)

    x_perturb2 = x + tf.random_normal(
        shape=tf.shape(x), mean=0.0, stddev=0.2, dtype=tf.float32)
    y_2 = classifier(x_perturb2)
    x_fgsm_rd = attack.fgsm(x_perturb2, y_2, eps=0.2, clip_min=0, clip_max=1)
    y_fgsm_rd = classifier(x_fgsm_rd)

    loss_cls = softmax_loss(label, y_perturb)

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss_cls, var_list = c_vars, global_step = global_step)

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])

    # train
    saver = tf.train.Saver()
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    x_pre_fgsm_data = np.load(os.path.join('data', 'x_fgsm_mnist.npy'))

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1]}
            sess.run(train_op_classifier, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                acc = {}
                dist = {}
                x_data = mnist.test.images
                label_data = mnist.test.labels

                acc['benign'] = sess.run(get_acc(x, label),
                                         feed_dict={
                                             x: mnist.test.images,
                                             label: mnist.test.labels
                                         })
                acc['pre'] = sess.run(get_acc(x, label),
                                      feed_dict={
                                          x: x_pre_fgsm_data,
                                          label: mnist.test.labels
                                      })
                dist['pre'] = sess.run(avg_dist(x1, x2),
                                       feed_dict={
                                           x1: mnist.test.images,
                                           x2: x_pre_fgsm_data
                                       })

                x_fgsm_data = sess.run(x_fgsm,
                                       feed_dict={
                                           x: mnist.test.images,
                                           label: mnist.test.labels
                                       })
                acc['fgsm'] = sess.run(get_acc(x, label),
                                       feed_dict={
                                           x: x_fgsm_data,
                                           label: mnist.test.labels
                                       })
                dist['fgsm'] = sess.run(avg_dist(x1, x2),
                                        feed_dict={
                                            x1: mnist.test.images,
                                            x2: x_fgsm_data
                                        })

                x_perturb_data = x_data + np.random.normal(
                    loc=0.0, scale=0.2, size=[10000, 784])
                x_perturb_data = np.clip(x_perturb_data, 0, 1)
                x_rd_fgsm_data = sess.run(x_fgsm,
                                          feed_dict={
                                              x: x_perturb_data,
                                              label: mnist.test.labels
                                          })
                acc['rd'] = sess.run(get_acc(x, label),
                                     feed_dict={
                                         x: x_rd_fgsm_data,
                                         label: mnist.test.labels
                                     })
                dist['rd'] = sess.run(avg_dist(x1, x2),
                                      feed_dict={
                                          x1: mnist.test.images,
                                          x2: x_rd_fgsm_data
                                      })

                x_it_fgsm_data = np.copy(mnist.test.images)
                for _ in range(10):
                    grad = sess.run(tf.gradients(loss_cls, x)[0],
                                    feed_dict={
                                        x: x_it_fgsm_data,
                                        label: mnist.test.labels
                                    })
                    x_it_fgsm_data += np.sign(grad) * 0.01
                    x_it_fgsm_data = np.clip(x_it_fgsm_data, 0, 1)
                acc['it'] = sess.run(get_acc(x, label),
                                     feed_dict={
                                         x: x_it_fgsm_data,
                                         label: mnist.test.labels
                                     })
                dist['it'] = sess.run(avg_dist(x1, x2),
                                      feed_dict={
                                          x1: mnist.test.images,
                                          x2: x_it_fgsm_data
                                      })

                x_it_rd_fgsm_data = np.copy(
                    mnist.test.images) + np.random.normal(
                        loc=0.0, scale=0.2, size=[10000, 784])
                x_it_rd_fgsm_data = np.clip(x_it_rd_fgsm_data, 0, 1)
                for _ in range(10):
                    grad = sess.run(tf.gradients(loss_cls, x)[0],
                                    feed_dict={
                                        x: x_it_rd_fgsm_data,
                                        label: mnist.test.labels
                                    })
                    x_it_rd_fgsm_data += np.sign(grad) * 0.01
                    x_it_rd_fgsm_data = np.clip(x_it_rd_fgsm_data, 0, 1)
                acc['it rd'] = sess.run(get_acc(x, label),
                                        feed_dict={
                                            x: x_it_rd_fgsm_data,
                                            label: mnist.test.labels
                                        })
                dist['it rd'] = sess.run(avg_dist(x1, x2),
                                         feed_dict={
                                             x1: mnist.test.images,
                                             x2: x_it_rd_fgsm_data
                                         })

                print(epoch)
                print(acc)
                print(dist)
예제 #8
0
def model_train(para):
    sigma = compute_sigma(para['eps'], para['delta'])
    std = sigma * para['sens']

    sess = tf.Session()
    tf.set_random_seed(random_seed)
    n = len(layers)
    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label

    w, b = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(1, n):
        w[i] = weight_variable([layers[i - 1], layers[i]])
        b[i] = bias_variable([layers[i]])

    # noisy model
    z, h = [0 for i in range(n)], [0 for i in range(n)]
    h[0] = x
    h[0] = h[0] + tf.random_normal(
        shape=tf.shape(h[0]), mean=0.0, stddev=std, dtype=tf.float32)
    for i in range(1, n):
        z[i] = tf.matmul(h[i - 1], w[i]) + b[i]
        if i < n - 1:
            h[i] = tf.nn.relu(z[i])
        else:
            h[i] = z[i]
    y = h[n - 1]
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y))

    # noiseless model
    z2, h2 = [0 for i in range(n)], [0 for i in range(n)]
    h2[0] = x
    for i in range(1, n):
        z2[i] = tf.matmul(h2[i - 1], w[i]) + b[i]
        if i < n - 1:
            h2[i] = tf.nn.relu(z2[i])
        else:
            h2[i] = z2[i]
    y2 = h2[n - 1]

    x_adv = attack.fgsm(x, y, eps=0.3, clip_min=0, clip_max=1)

    # gradient descent
    gw, gb = [0 for i in range(n)], [0 for i in range(n)]
    for i in range(1, n):
        gw[i] = tf.gradients(loss, w[i])[0]
        gb[i] = tf.gradients(loss, b[i])[0]
    opt = GradientDescentOptimizer(learning_rate=learning_rate)
    gradients = []
    for i in range(1, n):
        gradients.append((gw[i], w[i]))
        gradients.append((gb[i], b[i]))
    train_step = opt.apply_gradients(gradients)

    #evaluation
    acc = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(label, 1)), tf.float32))

    # data
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    x_adv_mnist_fsgm = np.load(os.path.join('data', 'x_adv_mnist_fsgm.npy'))

    print('sigma: {:.3f}, std: {:.3f}'.format(sigma, std))
    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        for t in range(steps):
            batch = mnist.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={x: batch[0], label: batch[1]})
            if t % int(1 / sample_rate) == 0 or t == steps - 1:
                if t < steps - 1:
                    epoch = int(t / int(1 / sample_rate))
                else:
                    epoch = epochs

                x_adv_sample = sess.run(x_adv,
                                        feed_dict={
                                            x: mnist.test.images,
                                            label: mnist.test.labels
                                        })
                acc_benign = sess.run(acc,
                                      feed_dict={
                                          x: mnist.test.images,
                                          label: mnist.test.labels
                                      })
                acc_adv = sess.run(acc,
                                   feed_dict={
                                       x: x_adv_sample,
                                       label: mnist.test.labels
                                   })
                acc_pre_adv = sess.run(acc,
                                       feed_dict={
                                           x: x_adv_mnist_fsgm,
                                           label: mnist.test.labels
                                       })

                print(epoch, acc_benign, acc_adv, acc_pre_adv)
예제 #9
0
def main():
    random_seed = 1024
    train_size, test_size = 55000, 10000
    batch_size = 100
    learning_rate = 0.05
    epochs = 5
    steps = epochs * 550
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    sess = tf.InteractiveSession(config=config)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    tf.set_random_seed(random_seed)

    x = tf.placeholder(tf.float32, [None, 784])  # input
    label = tf.placeholder(tf.float32, [None, 10])  # true label

    noise = tf.placeholder(tf.float32, [None, 100])  # noise vector
    y_target = tf.placeholder(tf.float32, [None, 10])  # target label

    x_perturb = x + tf.random_normal(
        shape=tf.shape(x), mean=0.0, stddev=0.5, dtype=tf.float32)
    x_perturb = tf.clip_by_value(x_perturb, 0, 1)

    x1 = tf.placeholder(tf.float32, [None, 784])
    x2 = tf.placeholder(tf.float32, [None, 784])

    y_n = classifier_n(x)
    y = classifier(x)
    y_perturb = classifier(x_perturb)

    # gan
    x_gan = generator(x, noise)
    y_gan = classifier(x_gan)

    loss_cls = softmax_loss(label, y_n)  #+ softmax_loss(label, y_gan)
    loss_gan = -softmax_loss(label, y_gan)

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    g_vars = [var for var in all_vars if 'generator' in var.name]
    train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss_cls, var_list = c_vars, global_step = global_step)
    train_op_generator = GradientDescentOptimizer(learning_rate = 0.05) \
        .minimize(loss_gan, var_list = g_vars, global_step = global_step)

    #fgsm
    x_fgsm = attack.fgsm(x, y, eps=0.2, clip_min=0, clip_max=1)
    y_fgsm = classifier(x_fgsm)
    # random fgsm
    x_fgsm_rd = attack.fgsm(x_perturb,
                            y_perturb,
                            eps=0.2,
                            clip_min=0,
                            clip_max=1)
    y_fgsm_rd = classifier(x_fgsm_rd)
    # jsma
    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)

    # train
    saver = tf.train.Saver()
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    x_fgsm_mnist = np.load(os.path.join('data', 'x_fgsm_mnist.npy'))
    x_gan_mnist = np.load(os.path.join('data', 'x_gan_mnist.npy'))
    x_jsma_mnist_1 = np.load(os.path.join('data', 'x_jsma_mnist_1.npy'))

    y_target_batch = np.zeros((100, 10), dtype=np.float32)
    y_target_batch[:, 0] = 1.0
    y_target_test = np.zeros((10000, 10), dtype=np.float32)
    y_target_test[:, 0] = 1.0

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        acc = {}
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            noise_d = sample_Z(batch_size, 100)
            f_dict = {
                x: batch[0],
                label: batch[1],
                noise: noise_d,
                y_target: y_target_batch
            }
            sess.run(train_op_classifier, feed_dict=f_dict)
            #for j in range(1):
            #sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                acc['benign'] = sess.run(get_acc(x, label),
                                         feed_dict={
                                             x: mnist.test.images,
                                             label: mnist.test.labels
                                         })
                acc['pre fgsm'] = sess.run(get_acc(x, label),
                                           feed_dict={
                                               x: x_fgsm_mnist,
                                               label: mnist.test.labels
                                           })
                acc['pre gan'] = sess.run(get_acc(x, label),
                                          feed_dict={
                                              x: x_gan_mnist,
                                              label: mnist.test.labels
                                          })
                acc['pre jsma 1'] = sess.run(get_acc(x, label),
                                             feed_dict={
                                                 x: x_jsma_mnist_1,
                                                 label:
                                                 mnist.test.labels[0:100, ]
                                             })

                x_fgsm_d = sess.run(x_fgsm,
                                    feed_dict={
                                        x: mnist.test.images,
                                        label: mnist.test.labels
                                    })
                acc['fgsm'] = sess.run(get_acc(x, label),
                                       feed_dict={
                                           x: x_fgsm_d,
                                           label: mnist.test.labels
                                       })

                x_fgsm_rd_d = sess.run(x_fgsm_rd,
                                       feed_dict={
                                           x: mnist.test.images,
                                           label: mnist.test.labels
                                       })
                acc['fgsm_rd'] = sess.run(get_acc(x, label),
                                          feed_dict={
                                              x: x_fgsm_rd_d,
                                              label: mnist.test.labels
                                          })

                print(epoch, acc)
        '''
        print('train gan')
        for t in range(1, 550 * 10 + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
            sess.run(train_op_generator, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
                batch = mnist.test.next_batch(batch_size)
                f_dict = {x: batch[0], label: batch[1], noise: sample_Z(batch_size, 100), y_target: y_target_batch}
                x_gan_data = sess.run(x_gan, feed_dict=f_dict)
                acc_gan = sess.run(get_acc(x, label), feed_dict={x: x_gan_data, label: batch[1]})
                print(epoch, acc_gan)

        x_fgsm_d = sess.run(x_fgsm, feed_dict = {x: mnist.test.images, label: mnist.test.labels})
        acc['fgsm'] = sess.run(get_acc(x, label), feed_dict={x: x_fgsm_d, label: mnist.test.labels})

        x_gan_d = sess.run(x_gan, feed_dict={x: mnist.test.images ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        acc['gan'] = sess.run(get_acc(x, label), feed_dict={x: x_gan_d ,label: mnist.test.labels\
            , noise: sample_Z(10000, 100), y_target: y_target_test})
        '''
        jsma_params = {'theta': 1., 'gamma': 0.1,'nb_classes': 10, 'clip_min': 0.,'clip_max': 1., 'targets': y,\
            'y_val': y_target_batch}
        x_jsma_1_d = jsma.generate_np(mnist.test.images[0:100, ],
                                      **jsma_params)
        acc['jsma 1'] = sess.run(get_acc(x, label),
                                 feed_dict={
                                     x: x_jsma_1_d,
                                     label: mnist.test.labels[0:100, ]
                                 })

        print(acc['jsma 1'])
예제 #10
0
model_train(sess_legit, x_reg, x_seq, y, predictions, 
	train['reg'], train['seq'], train['expr'], 
	val['reg'], val['seq'], val['expr'], args=train_params)

pred=model.predict({'seq_input':test['seq'],'reg_input':test['reg']},batch_size=100,verbose=1)
plt.scatter(pred,test['expr'])
plt.savefig("%s/pred_vs_obs.legit.png"%(fig_dir))
output=np.column_stack((test['expr'], pred[:,0]))
np.savetxt("%s/prediction.legit.txt"%(out_dir), output,delimiter='\t')


# Adversarial training
sess_adv=tf.Session()
model_2=concatenation_model(num_reg,seq_length)
predictions_2 = model_2([x_reg,x_seq])
adv_x_seq_2 = fgsm(x_seq, predictions_2, eps=0.3)
predictions_2_adv = model_2([x_reg,adv_x_seq_2])
train_params = {
	'nb_epochs': 50,
	'batch_size': 100,
	'learning_rate': 0.01,
	'train_dir': log_dir,
	'filename': 'model_adv'
}

model_train(sess_adv, x_reg, x_seq, y, predictions_2, 
	train['reg'], train['seq'], train['expr'], 
	val['reg'], val['seq'], val['expr'], 
	predictions_adv=predictions_2_adv, args=train_params)

예제 #11
0
    saved_path = dirs.SAVED_MODELS
    if parseval:
        saved_path += '/wrn-28-10-p-t--2018-01-24-21-18/ResNet'  # Parseval
    else:
        saved_path += '/wrn-28-10-t--2018-01-23-19-13/ResNet'  # vanilla
    model.load_state(saved_path)

    cost, ev = model.test(ds_test)
    accuracies = [ev['accuracy']]
    for eps in epss[1:]:
        print("Creating adversarial examples...")
        clip_max = (255 - np.max(Cifar10Loader.mean)) / np.max(
            Cifar10Loader.std)
        n_fgsm = fgsm(model.nodes.input,
                      model.nodes.probs,
                      eps=eps,
                      clip_min=-clip_max,
                      clip_max=clip_max)
        images_adv, = batch_eval(model._sess, [model.nodes.input], [n_fgsm],
                                 [ds_test.images[:model.batch_size * 64]],
                                 args={'batch_size': model.batch_size},
                                 feed={model._is_training: False})
        adv_ds_test = Dataset(images_adv, ds_test.labels, ds_test.class_count)
        cost, ev = model.test(adv_ds_test)
        accuracies.append(ev['accuracy'])
    accuracieses.append(accuracies)
    print(accuracies)


def plot(epss, curves, names):
    plt.figure()
예제 #12
0
def train(hps, data):
    """Training loop."""
    images = tf.placeholder(tf.float32, shape=(None, FLAGS.image_size, FLAGS.image_size, FLAGS.channels), name="images")
    labels = tf.placeholder(tf.int64, shape=(None), name="labels")
    labels_onehot = tf.one_hot(labels, depth=hps.num_classes, dtype=tf.float32, name="labels_onehot")
    if FLAGS.label_smooth:
        labels_onehot = label_smooth(labels_onehot)

    lrn_rate = tf.placeholder(tf.float32, shape=(), name="lrn_rate")
    tf.logging.info(json.dumps(vars(FLAGS)))
    tf.logging.info(json.dumps(hps._asdict()))

    flipped_images = random_flip_left_right(images)

    net = tf.make_template('net', resnet_template, hps=hps) if FLAGS.model == 'resnet' else \
        tf.make_template('net', vgg_template, hps=hps)

    truth = labels
    if FLAGS.adversarial or FLAGS.adversarial_BIM:
        logits = net(flipped_images, training=False)
    else:
        logits = net(flipped_images, training=True)
    probs = tf.nn.softmax(logits)

    predictions = tf.argmax(logits, axis=1)
    precision = tf.reduce_mean(tf.to_float(tf.equal(predictions, truth)))

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_onehot))

    weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net')
    weight_norm = tf.add_n([tf.nn.l2_loss(v) for v in weights])
    cost = cost + 0.0005 * weight_norm

    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = tf.train.MomentumOptimizer(learning_rate=lrn_rate, momentum=0.9).minimize(cost)

    if FLAGS.adversarial or FLAGS.adversarial_BIM:
        eps = tf.abs(tf.truncated_normal(shape=(tf.shape(images)[0],), mean=0, stddev=FLAGS.adv_std))
        eps = eps[:, None, None, None]
        adv_x = fgsm(flipped_images, probs, eps=eps, clip_min=0.0, clip_max=255.0)
        adv_x_leak = fgm(flipped_images, probs, y=labels_onehot, eps=np.asarray([1])[:, None, None, None],
                         clip_min=0.0, clip_max=255.0)

        adv_logits = net(adv_x, training=False)
        adv_pred = tf.argmax(adv_logits, axis=1)
        adv_precision = tf.reduce_mean(tf.to_float(tf.equal(adv_pred, truth)))

        adv_logits_leak = net(adv_x_leak, training=False)
        adv_pred_leak = tf.argmax(adv_logits_leak, axis=1)
        adv_precision_leak = tf.reduce_mean(tf.to_float(tf.equal(adv_pred_leak, truth)))

        num_normal = hps.batch_size // 2
        combined_images = tf.concat([flipped_images[:num_normal], images[num_normal:]], axis=0)
        com_logits = net(combined_images, training=True)

        normal_cost = 2.0 / 1.3 * tf.nn.softmax_cross_entropy_with_logits(logits=com_logits[:num_normal],
                                                                          labels=labels_onehot[:num_normal])
        adv_cost = 0.6 / 1.3 * tf.nn.softmax_cross_entropy_with_logits(logits=com_logits[num_normal:],
                                                                       labels=labels_onehot[num_normal:])

        combined_cost = tf.reduce_mean(tf.concat([normal_cost, adv_cost], axis=0)) + 0.0005 * weight_norm

        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            train_op_adv = tf.train.MomentumOptimizer(learning_rate=lrn_rate, momentum=0.9).minimize(combined_cost)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(max_to_keep=3)
        save_path, save_path_ckpt = get_weights_path()
        state = tf.train.get_checkpoint_state(save_path)
        if state and state.model_checkpoint_path:
            ans = verify("Warning: model already trained. Delete files and re-train? (y/n)")
            if ans:
                shutil.rmtree(save_path)
                os.makedirs(save_path)
            else:
                saver_state = tf.train.get_checkpoint_state(save_path)
                saver.restore(sess, saver_state.model_checkpoint_path)
                # raise FileExistsError("Model weight already exists")
        else:
            os.makedirs(save_path, exist_ok=True)

        hps_path = os.path.join(save_path, 'hps.txt')
        with open(hps_path, 'w') as fout:
            fout.write(json.dumps(vars(FLAGS)))
            fout.write(json.dumps(hps._asdict()))

        for iter in range(FLAGS.maxiter):
            try:
                x, y = data.next(hps.batch_size)
            except StopIteration:
                tf.logging.info("New epoch!")

            if iter < 40000:
                lr = 0.1
            elif iter < 60000:
                lr = 0.01
            elif iter < 80000:
                lr = 0.001
            else:
                lr = 0.0001

            if not FLAGS.adversarial and not FLAGS.adversarial_BIM:
                _, acc = sess.run([train_op, precision], feed_dict={
                    images: x,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}".format(iter + 1, acc))
            elif FLAGS.adversarial:
                adv_images, acc, acc_adv = sess.run([adv_x, precision, adv_precision], feed_dict={
                    images: x,
                    labels: y,
                })
                combined_batch = np.concatenate([x[:num_normal], adv_images[num_normal:]], axis=0)
                _, com_loss = sess.run([train_op_adv, combined_cost], feed_dict={
                    images: combined_batch,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}, Adv precision: {:.6f}, Combined loss: {:.6f}"
                                .format(iter + 1, acc, acc_adv, com_loss))

            elif FLAGS.adversarial_BIM:
                BIM_eps = np.abs(truncnorm.rvs(a=-2., b=2.) * FLAGS.adv_std)
                attack_iter = int(min(BIM_eps + 4, 1.25 * BIM_eps))
                adv_images = np.copy(x)
                for i in range(attack_iter):
                    adv_images, acc, acc_adv = sess.run([adv_x_leak, precision, adv_precision_leak], feed_dict={
                        images: adv_images,
                        labels: y,
                    })

                combined_batch = np.concatenate([x[:num_normal], adv_images[num_normal:]], axis=0)
                _, com_loss = sess.run([train_op_adv, combined_cost], feed_dict={
                    images: combined_batch,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}, Adv precision: {:.6f}, Combined loss: {:.6f}"
                                .format(iter + 1, acc, acc_adv, com_loss))

            if (iter + 1) % 5000 == 0:
                saver.save(sess, save_path_ckpt, global_step=iter + 1)
                tf.logging.info("Model saved! Path: " + save_path)
예제 #13
0
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio))
    #############################
    ##Hyper-parameter Setting####
    #############################
    hk = 256; #number of hidden units at the last layer
    Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer
    Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer
    Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer
    D = 50000; #size of the dataset
    L = 2499; #batch size
    image_size = 28;
    padding = 4;
    #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units
    #gen_ratio = 1
    epsilon1 = 0.0; #0.175; #epsilon for dpLRP
    epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer
    epsilon3 = 0.1*(1); #epsilon for the last hidden layer
    total_eps = epsilon1 + epsilon2 + epsilon3
    print(total_eps)
    uncert = 0.1; #uncertainty modeling at the output layer
    infl = 1; #inflation rate in the privacy budget redistribution
    R_lowerbound = 1e-5; #lower bound of the LRP
    c = [0, 40, 50, 200] #norm bounds
    epochs = 200; #number of epochs
    preT_epochs = 50; #number of epochs
    T = int(D/L*epochs + 1); #number of steps T
    pre_T = int(D/L*preT_epochs + 1);
    step_for_epoch = int(D/L); #number of steps for one epoch
    
    broken_ratio = 1
    #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10]
    #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2)
    #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2)
    
    #fgsm_eps = 0.1
    rand_alpha = 0.05
    
    ##Robustness##
    robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2;
    ####
    
    LRPfile = os.getcwd() + '/Relevance_R_0_075.txt';
    #############################
    mnist = input_data.read_data_sets("MNIST_data/", one_hot = True);

    #############################
    ##Construct the Model########
    #############################
    #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer#

    #Compute the 1/|L| * Delta3 for the last hidden layer#
    """eps3_ratio = Delta3_adv/Delta3_benign;
    eps3_benign = 1/(1+eps3_ratio)*(epsilon3)
    eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)"""
    loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L);
    ###
    #End Step 4#
    # Parameters Declarification
    W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]);
    b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]);

    shape     = W_conv1.get_shape().as_list()
    w_t       = tf.reshape(W_conv1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*(14*14 + 2)*25/(L*sensitivity)
    
    dp_epsilon=1.0 #0.1
    delta_r = fgsm_eps*(image_size**2);
    #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
    #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
    
    W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]);
    b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]);

    W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]);
    b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]);

    W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]);
    b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]);

    """scale2 = tf.Variable(tf.ones([hk]))
    beta2 = tf.Variable(tf.zeros([hk]))
    tf.add_to_collections([CONV_VARIABLES], scale2)
    tf.add_to_collections([CONV_VARIABLES], beta2)"""

    params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2]
    ###


    #Step 5: Create the model#
    noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);
    adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);

    keep_prob = tf.placeholder(tf.float32);
    x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    x_image = tf.reshape(x, [-1,image_size,image_size,1]);

    #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]);

    # pretrain ###
    #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise)
    ###########

    adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]);

    #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]);

    # pretrain adv ###
    #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32)
    #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);
    FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]);
    Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h)
    Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h)
    ###########
    
    x_image += noise;
    x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature.
    
    adv_image += adv_noise;
    adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature.

    #perturbFM = np.random.laplace(0.0, scale3_benign, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10)
    perturbFM = np.reshape(perturbFM, [hk, 10]);
    
    y_conv = inference(x_image, perturbFM, hk, FM_h, params);
    softmax_y_conv = tf.nn.softmax(y_conv)
    #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T)

    #perturbFM = np.random.laplace(0.0, scale3_adv, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params);
    #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T)

    # test model
    perturbFM_test = np.random.laplace(0.0, 0, hk)
    perturbFM_test = np.reshape(perturbFM_test, [hk]);
    x_test = tf.reshape(x, [-1,image_size,image_size,1]);
    y_test = inference(x_test, perturbFM_test, hk, FM_h, params);
    #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T)

    #Define a place holder for the output label#
    y_ = tf.placeholder(tf.float32, [None, 10]);
    adv_y_ = tf.placeholder(tf.float32, [None, 10]);
    #End Step 5#
    #############################

    #############################
    ##Define loss and Optimizer##
    #############################
    '''
        Computes differentially private sigmoid cross entropy given `logits`.
        
        Measures the probability error in discrete classification tasks in which each
        class is independent and not mutually exclusive.
        
        For brevity, let `x = logits`, `z = labels`.  The logistic loss is
        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
        = (1 - z) * x + log(1 + exp(-x))
        = x - x * z + log(1 + exp(-x))
        
        For x < 0, to avoid overflow in exp(-x), we reformulate the above
        
        x - x * z + log(1 + exp(-x))
        = log(exp(x)) - x * z + log(1 + exp(-x))
        = - x * z + log(1 + exp(x))
        
        Hence, to ensure stability and avoid overflow, the implementation uses this
        equivalent formulation
        
        max(x, 0) - x * z + log(1 + exp(-abs(x)))
        
        `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have:
        
        Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv)));
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2)
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2)
        = F1 + F2
        where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2
        
        To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2.
        Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data.
        Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows:
        
        By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2):
        
        perturbFM = np.random.laplace(0.0, scale3, hk * 10)
        perturbFM = np.reshape(perturbFM/L, [hk, 10]);
        
        where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3);
        
        To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow].
        
        Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow
    '''
    ### Taylor for benign x
    zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype)
    cond = (y_conv >= zeros)
    relu_logits = array_ops.where(cond, y_conv, zeros)
    neg_abs_logits = array_ops.where(cond, -y_conv, y_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_benign = tf.abs(y_conv - y_)

    ### Taylor for adv_x
    zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype)
    cond_adv = (y_adv_conv >= zeros_adv)
    relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv)
    neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_adv = tf.abs(y_adv_conv - adv_y_)

    ### Adversarial training loss
    adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv)

    '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.'''
    global_step = tf.Variable(0, trainable=False)
    pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
    train_var_list = tf.get_collection(CONV_VARIABLES)
    #print(pretrain_var_list)
    #print(train_var_list)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list);
        train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list);
    sess = tf.InteractiveSession();

    # Define the correct prediction and accuracy
    # This needs to be changed to "Robust Prediction"
    correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1));
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32));

    #############
    # use these to get predictions wrt to robust conditions
    """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32))
    accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask)
    #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))))
    certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))"""
    #############

    # craft adversarial samples from x for training
    dynamic_eps = tf.placeholder(tf.float32);
    emsemble_L = int(L/3)
    softmax_y = tf.nn.softmax(y_test)
    #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0)
    c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM
    x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]);

    #====================== attack =========================
    #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    #other possible attacks:
        # ElasticNetMethod
        # FastFeatureAdversaries
        # LBFGS
        # SaliencyMapMethod
        # VirtualAdversarialMethod

    # y_test = logits (before softmax)
    # softmax_y_test = preds (probs, after softmax)
    softmax_y_test = tf.nn.softmax(y_test)

    # create saver
    saver = tf.train.Saver(tf.all_variables())
    
    sess.run(W_conv1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = Delta2/L
    epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    print(epsilon2_update/_gamma + epsilon2_update/_gamma_x)
    print(epsilon2_update)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(14**2)
    dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
    #############################
    
    iterativeStep = 100
    
    # load the most recent models
    _global_step = 0
    ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train')
    if ckpt and ckpt.model_checkpoint_path:
        print(ckpt.model_checkpoint_path);
        saver.restore(sess, ckpt.model_checkpoint_path)
        _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        print('No checkpoint file found')

    start_time = time.time();

    # adv pretrain model (Auto encoder layer)
    cost = tf.reduce_sum(Enc_Layer2.cost);
    logfile.write("pretrain: \n")
    
    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)

    # rand+fgsm
    # if attack_switch['randfgsm']:
    #     randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
    #     x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0)
    #     x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0))

    # define each attack method's tensor
    mu_alpha = tf.placeholder(tf.float32, [1]);
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
        x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # Deepfool
    if attack_switch['deepfool']:
        print('creating attack tensor of DeepFool')
        deepfool_obj = DeepFool(model=ch_model_logits, sess=sess)
        #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['deepfool'] = x_adv_test_deepfool

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # SPSA
    # note here the epsilon is the infinity norm instead of precent of perturb
    # Maybe exclude this method first, since it seems to have some constrain about the data value range
    if attack_switch['spsa']:
        print('creating attack tensor of SPSA')
        spsa_obj = SPSA(model=ch_model_logits, sess=sess)
        #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2)
        x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1)
        attack_tensor_dict['spsa'] = x_adv_test_spsa

    # CarliniWagnerL2
    # confidence=0 is fron their paper
    # it is said to be slow, maybe exclude first
    if attack_switch['cwl2']:
        print('creating attack tensor of CarliniWagnerL2')
        cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess)
        #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['cwl2'] = x_adv_test_cwl2

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    # SpatialTransformationMethod
    # the params are pretty different from on the paper
    # so I use default
    # exclude since there's bug
    if attack_switch['stm']:
        print('creating attack tensor of SpatialTransformationMethod')
        stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2)
        x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6)
        attack_tensor_dict['stm'] = x_adv_test_stm
    #====================== attack =========================
    
    sess.run(tf.initialize_all_variables());

    ##perturb h for training
    perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
    perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);

    ##perturb h for testing
    perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32)
    perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]);

    '''for i in range(_global_step, _global_step + pre_T):
        d_eps = random.random();
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        """batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)"""

        batch_2 = mnist.train.next_batch(L);
        pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h});
        if i % int(5*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            logfile.write("step \t %d \t %g \n"%(i, cost_value))
            print(cost_value)

    pre_train_finish_time = time.time()
    print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))'''

    # train and test model with adv samples
    max_benign_acc = -1;
    max_robust_benign_acc = -1
    #max_adv_acc = -1;

    test_size = len(mnist.test.images)
    AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size);

    Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L);
    BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_adv_acc_dict = {}
    max_robust_adv_acc_dict = {}
    #max_robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in max_adv_acc_dict:
            max_adv_acc_dict[atk] = -1
            max_robust_adv_acc_dict[atk] = -1

    for i in range(_global_step, _global_step + T):
        # this batch is for generating adv samples
        batch = mnist.train.next_batch(emsemble_L); #Get a random batch.
        y_adv_batch = batch[1]
        #The number of epochs we print out the result. Print out the result every 5 epochs.
        if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch):
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            print(cost_value)
            
            if last_eval_time < 0:
                last_eval_time = time.time()
            #===================benign samples=====================
            predictions_form_argmax = np.zeros([test_size, 10])
            #test_bach = mnist.test.next_batch(test_size)
            softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 1):
                _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1;
                softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)})
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax;
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            acc = np.sum(is_correct)*1.0/test_size
            robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_utility = np.sum(is_robust)*1.0/test_size
            max_benign_acc = max(max_benign_acc, acc)
            max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility)
            log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)
            #===================adv samples=====================
            #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps)
            """adv_images_dict = {}
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels})
            print("Done with the generating of Adversarial samples")"""
            #===================adv samples=====================
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            for atk in attack_switch.keys():
                if atk not in adv_acc_dict:
                    adv_acc_dict[atk] = -1
                    robust_adv_acc_dict[atk] = -1
                    robust_adv_utility_dict[atk] = -1
                if attack_switch[atk]:
                    adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]})
                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([test_size, 10])
                    softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h})
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(0, 2000):
                        if n_draws % 1000 == 0:
                            print(n_draws)
                        _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                        _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                        _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                        for j in range(test_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1;
                        softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h})
                        #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h})
                        argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    final_predictions = predictions_form_argmax;
                    is_correct = []
                    is_robust = []
                    for j in range(test_size):
                        is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                        robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size
                    robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
                    robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size
                    ##############################
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    # added robust prediction
                    log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
                    max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk])
                    max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
            print(log_str)
            logfile.write(log_str + '\n')

            # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc))
            # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc));

            # estimate end time
            """if i > 0 and i % int(10*step_for_epoch) == 0:
                current_time_interval = time.time() - last_eval_time
                last_eval_time = time.time()
                print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval)))
                accum_time += current_time_interval
                accum_epoch += 10
                estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch)
                print('estimate finish in: {}'.format(parse_time(estimate_time)))"""

            #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty})));
            """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=i);"""

        d_eps = random.random();
        y_adv = batch[1]
        adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        """for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})"""
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        # train with benign and adv samples
        pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h});
        train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h});
    duration = time.time() - start_time;
    # print(parse_time(duration)); #print running time duration#

    max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk])
    logfile.write(max_acc_string + '\n')
    logfile.write(str(duration) + '\n')
예제 #14
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 784))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    # Using functions from /cleverhans/attacks_tf.py
    # Will be deprecated next year
    adv_x = fgsm(x, predictions, eps=0.3)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], batch_size=128)

    # Using functions from /cleverhans/attacks.py (as specified by creators)
    # Does not work at the moment
    '''
    wrap = KerasModelWrapper(loaded_model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3}
                   #'y': y}
    adv_x = fgsm.generate(x, **fgsm_params)
    adv_x = tf.stop_gradient(adv_x)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], batch_size=128)
    predictions_adv = loaded_model(adv_x)
    '''

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test_adv,
                          Y_test,
                          args={"batch_size": 128})
    print('Test accuracy on adversarial test examples: ' + str(accuracy))

    # Craft adversarial examples using Jacobian-based Saliency Map Approach (JSMA)
    wrap = KerasModelWrapper(loaded_model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    adv_x = jsma.generate(x, **jsma_params)
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = loaded_model(adv_x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          preds_adv,
                          X_test,
                          Y_test,
                          args={"batch_size": 512})
    print('Test accuracy on adversarial test examples: ' + str(accuracy))
    '''
    report = AccuracyReport()
    viz_enabled=VIZ_ENABLED
    source_samples=SOURCE_SAMPLES
    img_rows, img_cols, nchannels = 28, 28, 1
    nb_classes = 10

    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
        ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    wrap = KerasModelWrapper(loaded_model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
      print('--------------------------------------')
      print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
      sample = X_test[sample_ind:(sample_ind + 1)]

      # We want to find an adversarial example for each possible target class
      # (i.e. all classes that differ from the label given in the dataset)
      current_class = int(np.argmax(y_test[sample_ind]))
      target_classes = other_classes(nb_classes, current_class)

      # For the grid visualization, keep original images along the diagonal
      grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
          sample, (img_rows, img_cols, nchannels))

      # Loop over all target classes
      for target in target_classes:
        print('Generating adv. example for target class %i' % target)

        # This call runs the Jacobian-based saliency map approach
        one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
        one_hot_target[0, target] = 1
        jsma_params['y_target'] = one_hot_target
        adv_x = jsma.generate_np(sample, **jsma_params)

        # Check if success was achieved
        res = int(model_argmax(sess, x, predictions, adv_x) == target)

        # Computer number of modified features
        adv_x_reshape = adv_x.reshape(-1)
        test_in_reshape = X_test[sample_ind].reshape(-1)
        nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
        percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

        # Display the original and adversarial images side-by-side
        if viz_enabled:
          figure = pair_visual(
              np.reshape(sample, (img_rows, img_cols, nchannels)),
              np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

        # Add our adversarial example to our grid data
        grid_viz_data[target, current_class, :, :, :] = np.reshape(
            adv_x, (img_rows, img_cols, nchannels))

        # Update the arrays for later analysis
        results[target, sample_ind] = res
        perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
      import matplotlib.pyplot as plt
      plt.close(figure)
      _ = grid_visual(grid_viz_data)
        
      #adv_x = jsma(sess, x, predictions, 10, X_test, Y_test, 0, 0.5, 0, 1)
      #X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], batch_size=128)
      #accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test, args={ "batch_size" : 128 })
    '''
    sess.close()