def BT_Layer2_add(Input, num_layer): d_in = [36, 36] d_out = [16, 12] Tuk_r = 3 W1 = cifar10._variable_with_weight_decay('W1%s' % num_layer, shape=[d_in[0], Tuk_r, d_out[0]], stddev=0.4, wd=0.04) W2 = cifar10._variable_with_weight_decay('W2%s' % num_layer, shape=[d_in[1], Tuk_r, d_out[1]], stddev=0.4, wd=0.04) W_core = cifar10._variable_with_weight_decay('W_core%s' % num_layer, shape=[Tuk_r, Tuk_r], stddev=0.4, wd=0.04) # W1 = tf.Variable(tf.truncated_normal([d_in[0], Tuk_r, d_out[0]], stddev=0.22)) # W2 = tf.Variable(tf.truncated_normal([d_in[1], Tuk_r, d_out[1]], stddev=0.22)) # W_core = tf.Variable(tf.truncated_normal([Tuk_r, Tuk_r], stddev=0.22)) op = 'abc,be->aec' W1core = tf.reshape(tf.einsum(op, W1, W_core), [d_in[0], Tuk_r, d_out[0]]) op = 'abc,dae,ebf->dcf' return tf.reshape(tf.einsum(op, W1core, Input, W2), [-1, d_out[0] * d_out[1]])
def BT_Layer3(Input, num_layer): d_in = [12, 12, 16] d_out = [12, 12, 9] Tuk_r = 3 W1 = cifar10._variable_with_weight_decay('W1%s' % num_layer, shape=[d_in[0], Tuk_r, d_out[0]], stddev=0.4, wd=0.04) W3 = cifar10._variable_with_weight_decay('W3%s' % num_layer, shape=[d_in[1], Tuk_r, d_out[1]], stddev=0.4, wd=0.04) W4 = cifar10._variable_with_weight_decay('W4%s' % num_layer, shape=[d_in[2], Tuk_r, d_out[2]], stddev=0.4, wd=0.04) W_core = cifar10._variable_with_weight_decay('W_core%s' % num_layer, shape=[Tuk_r, Tuk_r**2], stddev=0.4, wd=0.04) # W1 = tf.Variable(tf.truncated_normal([d_in[0], Tuk_r, d_out[0]], stddev=0.3)) # W2 = tf.Variable(tf.truncated_normal([d_in[1], Tuk_r, d_out[1]], stddev=0.3)) # W3 = tf.Variable(tf.truncated_normal([d_in[2], Tuk_r, d_out[2]], stddev=0.3)) # W4 = tf.Variable(tf.truncated_normal([d_in[3], Tuk_r, d_out[3]], stddev=0.3)) # W_core = tf.Variable(tf.truncated_normal([Tuk_r, Tuk_r, Tuk_r ** 2], stddev=0.3)) op = 'abc,be->aec' W1core = tf.reshape(tf.einsum(op, W1, W_core), [d_in[0], Tuk_r**2, d_out[0]]) op = 'abc,def->adbecf' W34 = tf.reshape(tf.einsum(op, W3, W4), [d_in[1] * d_in[2], Tuk_r**2, d_out[1] * d_out[2]]) op = 'abc,dae,ebf->dcf' return tf.reshape(tf.einsum(op, W1core, Input, W34), [-1, d_out[0] * d_out[1] * d_out[2]])
def BT_Layer4(Input, num_layer, stddev=0.4): print('using BT_layer4...') d_in = [6, 6, 6, 6] d_out = [4, 4, 4, 6] Tuk_r = 3 W1 = cifar10._variable_with_weight_decay('W1%s' % num_layer, shape=[d_in[0], Tuk_r, d_out[0]], stddev=stddev, wd=0.04) W2 = cifar10._variable_with_weight_decay('W2%s' % num_layer, shape=[d_in[1], Tuk_r, d_out[1]], stddev=stddev, wd=0.04) W3 = cifar10._variable_with_weight_decay('W3%s' % num_layer, shape=[d_in[2], Tuk_r, d_out[2]], stddev=stddev, wd=0.04) W4 = cifar10._variable_with_weight_decay('W4%s' % num_layer, shape=[d_in[3], Tuk_r, d_out[3]], stddev=stddev, wd=0.04) W_core = cifar10._variable_with_weight_decay( 'W_core%s' % num_layer, shape=[Tuk_r, Tuk_r, Tuk_r**2], stddev=stddev, wd=0.04) # W1 = tf.Variable(tf.truncated_normal([d_in[0], Tuk_r, d_out[0]], stddev=stddev)) # W2 = tf.Variable(tf.truncated_normal([d_in[1], Tuk_r, d_out[1]], stddev=stddev)) # W3 = tf.Variable(tf.truncated_normal([d_in[2], Tuk_r, d_out[2]], stddev=stddev)) # W4 = tf.Variable(tf.truncated_normal([d_in[3], Tuk_r, d_out[3]], stddev=stddev)) # W_core = tf.Variable(tf.truncated_normal([Tuk_r, Tuk_r, Tuk_r ** 2], stddev=stddev)) op = 'abc,bex,def->adxcf' W12 = tf.reshape(tf.einsum(op, W1, W_core, W2), [d_in[0] * d_in[1], Tuk_r**2, d_out[0] * d_out[1]]) op = 'abc,def->adbecf' W34 = tf.reshape(tf.einsum(op, W3, W4), [d_in[2] * d_in[3], Tuk_r**2, d_out[2] * d_out[3]]) op = 'abc,dae,ebf->dcf' return tf.reshape(tf.einsum(op, W12, Input, W34), [-1, d_out[0] * d_out[1] * d_out[2] * d_out[3]])
def BT_Layer6(Input, num_layer): d_in = [4, 3, 3, 4, 4, 4] d_out = [4, 4, 3, 3, 3, 3] d_in_half1, d_in_half2 = d_in[0] * d_in[1] * d_in[2], d_in[3] * d_in[ 4] * d_in[5] d_out_half1, d_out_half2 = d_out[0] * d_out[1] * d_out[2], d_out[ 3] * d_out[4] * d_out[5] Tuk_r = 1 W1 = cifar10._variable_with_weight_decay('W1%s' % num_layer, shape=[d_in[0], Tuk_r, d_out[0]], stddev=0.4, wd=0.04) W2 = cifar10._variable_with_weight_decay('W2%s' % num_layer, shape=[d_in[1], Tuk_r, d_out[1]], stddev=0.4, wd=0.04) W3 = cifar10._variable_with_weight_decay('W3%s' % num_layer, shape=[d_in[2], Tuk_r, d_out[2]], stddev=0.4, wd=0.04) W4 = cifar10._variable_with_weight_decay('W4%s' % num_layer, shape=[d_in[3], Tuk_r, d_out[3]], stddev=0.4, wd=0.04) W5 = cifar10._variable_with_weight_decay('W5%s' % num_layer, shape=[d_in[4], Tuk_r, d_out[4]], stddev=0.4, wd=0.04) W6 = cifar10._variable_with_weight_decay('W6%s' % num_layer, shape=[d_in[5], Tuk_r, d_out[5]], stddev=0.4, wd=0.04) W_core = cifar10._variable_with_weight_decay( 'W_core%s' % num_layer, shape=[Tuk_r, Tuk_r, Tuk_r, Tuk_r**3], stddev=0.4, wd=0.04) # W1 = tf.Variable(tf.truncated_normal([d_in[0], Tuk_r, d_out[0]], stddev=0.35)) # W2 = tf.Variable(tf.truncated_normal([d_in[1], Tuk_r, d_out[1]], stddev=0.35)) # W3 = tf.Variable(tf.truncated_normal([d_in[2], Tuk_r, d_out[2]], stddev=0.35)) # W4 = tf.Variable(tf.truncated_normal([d_in[3], Tuk_r, d_out[3]], stddev=0.35)) # W5 = tf.Variable(tf.truncated_normal([d_in[4], Tuk_r, d_out[4]], stddev=0.35)) # W6 = tf.Variable(tf.truncated_normal([d_in[5], Tuk_r, d_out[5]], stddev=0.35)) # W_core = tf.Variable(tf.truncated_normal([Tuk_r, Tuk_r, Tuk_r, Tuk_r ** 3], stddev=0.35)) op = 'abc,behx,def,ghi->adgxcfi' W123 = tf.reshape(tf.einsum(op, W1, W_core, W2, W3), [d_in_half1, Tuk_r**3, d_out_half1]) op = 'abc,def,ghi->adgbehcfi' W456 = tf.reshape(tf.einsum(op, W4, W5, W6), [d_in_half2, Tuk_r**3, d_out_half2]) op = 'abc,dae,ebf->dcf' return tf.reshape(tf.einsum(op, W123, Input, W456), [-1, d_out_half1 * d_out_half2])
def TT_Layer4(Input, num_layer): if num_layer == 1: d_in = [6, 6, 8, 8] d_out = [6, 6, 6, 6] TT_r = 8 if num_layer == 2: d_in = [8, 8, 8, 8] d_out = [7, 7, 7, 7] TT_r = 2 W1 = cifar10._variable_with_weight_decay('W1', shape=[d_in[0], TT_r, d_out[0]], stddev=0.4, wd=0.04) W2 = cifar10._variable_with_weight_decay( 'W2', shape=[TT_r, d_in[1], TT_r, d_out[1]], stddev=0.4, wd=0.04) W3 = cifar10._variable_with_weight_decay( 'W3', shape=[TT_r, d_in[2], TT_r, d_out[2]], stddev=0.4, wd=0.04) W4 = cifar10._variable_with_weight_decay('W4', shape=[d_in[3], TT_r, d_out[3]], stddev=0.4, wd=0.04) # W1 = tf.Variable(tf.truncated_normal([d_in[0], Tuk_r, d_out[0]], stddev=0.3)) # W2 = tf.Variable(tf.truncated_normal([d_in[1], Tuk_r, d_out[1]], stddev=0.3)) # W3 = tf.Variable(tf.truncated_normal([d_in[2], Tuk_r, d_out[2]], stddev=0.3)) # W4 = tf.Variable(tf.truncated_normal([d_in[3], Tuk_r, d_out[3]], stddev=0.3)) # W_core = tf.Variable(tf.truncated_normal([Tuk_r, Tuk_r, Tuk_r ** 2], stddev=0.3)) op = 'abc,bdef->adecf' W12 = tf.reshape(tf.einsum(op, W1, W2), [d_in[0] * d_in[1], TT_r, d_out[0] * d_out[1]]) op = 'abcd,ecf->beadf' W34 = tf.reshape(tf.einsum(op, W3, W4), [d_in[2] * d_in[3], TT_r, d_out[2] * d_out[3]]) op = 'abc,dae,ebf->dcf' return tf.reshape(tf.einsum(op, W12, Input, W34), [-1, d_out[0] * d_out[1] * d_out[2] * d_out[3]])
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() labels = tf.one_hot(labels, 10) # Build a Graph that computes the logits predictions from the # inference model. #logits = inference(images) with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay('weights', shape=[3, 3, 3, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(images, kernel1, [1, 2, 2, 1], padding='SAME') #conv = tf.nn.dropout(conv, 0.9) biases1 = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases1) conv1 = tf.nn.relu(pre_activation, name = scope.name) cifar10._activation_summary(conv1) norm1 = tf.contrib.layers.batch_norm(conv1, scale=True, is_training=True, updates_collections=None) # conv2 with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay('weights', shape=[5, 5, 128, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm1, kernel2, [1, 1, 1, 1], padding='SAME') biases2 = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases2) conv2 = tf.nn.relu(pre_activation, name = scope.name) #conv2 = tf.nn.dropout(conv2, 0.9) cifar10._activation_summary(conv2) # concat conv2 with norm1 to increase the number of features, this step does not affect the privacy preserving guarantee current = tf.concat((conv2, norm1), axis=3) # norm2 norm2 = tf.contrib.layers.batch_norm(current, scale=True, is_training=True, updates_collections=None) # conv3 with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay('weights', shape=[5, 5, 256, 256], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm2, kernel3, [1, 1, 1, 1], padding='SAME') biases3 = cifar10._variable_on_cpu('biases', [256], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases3) conv3 = tf.nn.relu(pre_activation, name = scope.name) #conv3 = tf.nn.dropout(conv3, 0.9) cifar10._activation_summary(conv3) # norm3 norm3 = tf.contrib.layers.batch_norm(conv3, scale=True, is_training=True, updates_collections=None) #pool3, row_pooling_sequence, col_pooling_sequence = tf.nn.fractional_max_pool(norm3, pooling_ratio=[1.0, 2.0, 2.0, 1.0]) pool3 = avg_pool(norm3, 2) # local4 with tf.variable_scope('local4') as scope: weights1 = cifar10._variable_with_weight_decay('weights', shape=[5*5*256, hk], stddev=0.04, wd=None) biases4 = cifar10._variable_on_cpu('biases', [hk], tf.constant_initializer(0.1)) h_pool2_flat = tf.reshape(pool3, [-1, 5*5*256]); z2 = tf.add(tf.matmul(h_pool2_flat, weights1), biases4, name=scope.name) #Applying normalization for the flat connected layer h_fc1# batch_mean2, batch_var2 = tf.nn.moments(z2,[0]) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) BN_norm = tf.nn.batch_normalization(z2,batch_mean2,batch_var2,beta2,scale2,1e-3) ### local4 = max_out(BN_norm, hk) cifar10._activation_summary(local4) # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. weights2 = cifar10._variable_with_weight_decay('weights', [hk, 10], stddev=1/(hk*1.0), wd=0.0) biases5 = cifar10._variable_on_cpu('biases', [10], tf.constant_initializer(0.0)) logits = tf.add(tf.matmul(local4, weights2), biases5, name=scope.name) cifar10._activation_summary(logits) # Calculate loss. Apply Taylor Expansion for the output layer #loss = cifar10.loss(logits, labels) # Calculate the average cross entropy loss across the batch. labels = tf.cast(labels, tf.int64) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=logits, name='cross_entropy_per_example') loss = tf.reduce_mean(cross_entropy, name='cross_entropy') opt = tf.train.GradientDescentOptimizer(lr) gw_K1 = tf.gradients(loss, kernel1)[0] gb1 = tf.gradients(loss, biases1)[0] gw_K2 = tf.gradients(loss, kernel2)[0] gb2 = tf.gradients(loss, biases2)[0] gw_K3 = tf.gradients(loss, kernel3)[0] gb3 = tf.gradients(loss, biases3)[0] gw_W1 = tf.gradients(loss, weights1)[0] gb4 = tf.gradients(loss, biases4)[0] gw_W2 = tf.gradients(loss, weights2)[0] gb5 = tf.gradients(loss, biases5)[0] #clip gradient gw_K1 = tf.clip_by_norm(gw_K1,clip_bound) gw_K2 = tf.clip_by_norm(gw_K2,clip_bound) gw_K3 = tf.clip_by_norm(gw_K3,clip_bound) gw_W1 = tf.clip_by_norm(gw_W1,clip_bound) gw_W2 = tf.clip_by_norm(gw_W2,clip_bound) #perturb gw_K1 += tf.random_normal(shape=tf.shape(gw_K1), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gw_K2 += tf.random_normal(shape=tf.shape(gw_K2), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gw_K3 += tf.random_normal(shape=tf.shape(gw_K3), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gw_W1 += tf.random_normal(shape=tf.shape(gw_W1), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gw_W2 += tf.random_normal(shape=tf.shape(gw_W2), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gb3 += tf.random_normal(shape=tf.shape(gb3), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gb4 += tf.random_normal(shape=tf.shape(gb4), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) gb5 += tf.random_normal(shape=tf.shape(gb5), mean=0.0, stddev = sigma * (sensitivity**2), dtype=tf.float32) # apply gradients and keep tracking moving average of the parameters apply_gradient_op = opt.apply_gradients([(gw_K1,kernel1),(gb1,biases1),(gw_K2,kernel2),(gb2,biases2),(gw_K3,kernel3),(gb3,biases3),(gw_W1,weights1),(gb4,biases4),(gw_W2,weights2),(gb5,biases5)], global_step=global_step); variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Privacy accountant priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending([None, None], sigma, batch_size) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + '/tmp/cifar10_train', sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path); saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D/batch_size))*epochs + 1) # number of steps step_for_epoch = int(math.ceil(D/batch_size)); #number of steps for one epoch for step in xrange(_global_step, _global_step + T): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (5*step_for_epoch) == 0: num_examples_per_step = batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % (5*step_for_epoch) == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % (5*step_for_epoch) == 0 and (step > _global_step): checkpoint_path = os.path.join(os.getcwd() + '/tmp/cifar10_train', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step); sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent(sess, target_eps=target_eps) print(step, spent_eps_deltas) sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent(sess, target_eps=target_eps) _break = False; for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True; break; if _break == True: break;
def inference(images): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ ### # We instantiate all variables using tf.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.get_variable() with tf.Variable(). # # conv1 #xavier = tf.contrib.layers.xavier_initializer_conv2d() with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay('weights', shape=[3, 3, 3, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(images, kernel1, [1, 2, 2, 1], padding='SAME') #conv = tf.nn.dropout(conv, 0.9) biases1 = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases1) conv1 = tf.nn.relu(pre_activation, name = scope.name) cifar10._activation_summary(conv1) norm1 = tf.contrib.layers.batch_norm(conv1, scale=True, is_training=True, updates_collections=None) # conv2 with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay('weights', shape=[5, 5, 128, 128], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm1, kernel2, [1, 1, 1, 1], padding='SAME') biases2 = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases2) conv2 = tf.nn.relu(pre_activation, name = scope.name) #conv2 = tf.nn.dropout(conv2, 0.9) cifar10._activation_summary(conv2) # concat conv2 with norm1 to increase the number of features, this step does not affect the privacy preserving guarantee current = tf.concat((conv2, norm1), axis=3) # norm2 norm2 = tf.contrib.layers.batch_norm(current, scale=True, is_training=True, updates_collections=None) # conv3 with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay('weights', shape=[5, 5, 256, 256], stddev=5e-2, wd=None) conv = tf.nn.conv2d(norm2, kernel3, [1, 1, 1, 1], padding='SAME') biases3 = cifar10._variable_on_cpu('biases', [256], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases3) conv3 = tf.nn.relu(pre_activation, name = scope.name) #conv3 = tf.nn.dropout(conv3, 0.9) cifar10._activation_summary(conv3) # norm3 norm3 = tf.contrib.layers.batch_norm(conv3, scale=True, is_training=True, updates_collections=None) #pool3, row_pooling_sequence, col_pooling_sequence = tf.nn.fractional_max_pool(norm3, pooling_ratio=[1.0, 2.0, 2.0, 1.0]) pool3 = avg_pool(norm3, 2) # local4 with tf.variable_scope('local4') as scope: weights1 = cifar10._variable_with_weight_decay('weights', shape=[5*5*256, hk], stddev=0.04, wd=None) biases4 = cifar10._variable_on_cpu('biases', [hk], tf.constant_initializer(0.1)) h_pool2_flat = tf.reshape(pool3, [-1, 5*5*256]); z2 = tf.add(tf.matmul(h_pool2_flat, weights1), biases4, name=scope.name) #Applying normalization for the flat connected layer h_fc1# batch_mean2, batch_var2 = tf.nn.moments(z2,[0]) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) BN_norm = tf.nn.batch_normalization(z2,batch_mean2,batch_var2,beta2,scale2,1e-3) ### local4 = max_out(BN_norm, hk) cifar10._activation_summary(local4) """print(images.get_shape()); print(norm1.get_shape()); print(norm2.get_shape()); print(pool3.get_shape()); print(local4.get_shape());""" # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. weights2 = cifar10._variable_with_weight_decay('weights', [hk, 10], stddev=1/(hk*1.0), wd=0.0) biases5 = cifar10._variable_on_cpu('biases', [10], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights2), biases5, name=scope.name) cifar10._activation_summary(softmax_linear) return softmax_linear
def train(cifar10_data, logfile): """Train CIFAR-10 for a number of steps.""" logfile.write("fgsm_eps \t %g, epsilon \t %d \n" % (fgsm_eps, target_eps[0])) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[3, 3, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases1 = cifar10._variable_on_cpu('biases1', [128], tf.constant_initializer(0.0)) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases2 = cifar10._variable_on_cpu('biases2', [128], tf.constant_initializer(0.1)) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases3 = cifar10._variable_on_cpu('biases3', [256], tf.constant_initializer(0.1)) #with tf.variable_scope('local4') as scope: kernel4 = cifar10._variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004) biases4 = cifar10._variable_on_cpu('biases4', [hk], tf.constant_initializer(0.1)) #with tf.variable_scope('local5') as scope: kernel5 = cifar10._variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0) biases5 = cifar10._variable_on_cpu('biases5', [10], tf.constant_initializer(0.1)) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5, scale2, beta2 ] ######## # Build a Graph that computes the logits predictions from the # inference model. shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 14, 14, 128]) sigma = tf.placeholder(tf.float32) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise); y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) #logits = inference(images) # Calculate loss. Apply Taylor Expansion for the output layer loss = cifar10.lossDPSGD(y_conv, y_) # noise redistribution # grad, = tf.gradients(loss, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))**2 sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = tf.train.GradientDescentOptimizer(lr) gw_K1 = tf.gradients(loss, kernel1)[0] gb1 = tf.gradients(loss, biases1)[0] gw_K2 = tf.gradients(loss, kernel2)[0] gb2 = tf.gradients(loss, biases2)[0] gw_K3 = tf.gradients(loss, kernel3)[0] gb3 = tf.gradients(loss, biases3)[0] gw_K4 = tf.gradients(loss, kernel4)[0] gb4 = tf.gradients(loss, biases4)[0] gw_K5 = tf.gradients(loss, kernel5)[0] gb5 = tf.gradients(loss, biases5)[0] #clip gradient gw_K1 = tf.clip_by_norm(gw_K1, clip_bound) gw_K2 = tf.clip_by_norm(gw_K2, clip_bound) gw_K3 = tf.clip_by_norm(gw_K3, clip_bound) gw_K4 = tf.clip_by_norm(gw_K4, clip_bound) gw_K5 = tf.clip_by_norm(gw_K5, clip_bound) #perturb gw_K1 += tf.random_normal(shape=tf.shape(gw_K1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K2 += tf.random_normal(shape=tf.shape(gw_K2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K3 += tf.random_normal(shape=tf.shape(gw_K3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K4 += tf.random_normal(shape=tf.shape(gw_K4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K5 += tf.random_normal(shape=tf.shape(gw_K5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb3 += tf.random_normal(shape=tf.shape(gb3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb4 += tf.random_normal(shape=tf.shape(gb4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb5 += tf.random_normal(shape=tf.shape(gb5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size # apply gradients and keep tracking moving average of the parameters apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1), (gb1, biases1), (gw_K2, kernel2), (gb2, biases2), (gw_K3, kernel3), (gb3, biases3), (gw_K4, kernel4), (gb4, biases4), (gw_K5, kernel5), (gb5, biases5)], global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.control_dependencies( [apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size) # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Privacy accountant priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / batch_size)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / batch_size)) #number of steps for one epoch s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128]) print("Compute The Noise Redistribution Vector") for step in xrange(_global_step, 100 * step_for_epoch): batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0, sigma: sigma_value * 0 }) if step % (5 * step_for_epoch) == 0: print(loss_value) batch = cifar10_data.train.next_batch(40 * batch_size) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * ( sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis sess.run(init) print("Training") for step in xrange(_global_step, _global_step + T): start_time = time.time() batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2}) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: (__noiseE + __noiseH) / 2, sigma: sigma_value }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if step % (5 * step_for_epoch) == 0: print(loss_value) print(spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break ## Robustness print("Testing") adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch(test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={x: test_bach[0]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128]) _noise = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## log_str = "" for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n')
def test_inference(images): with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[3, 3, 3, 128], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(images, kernel, [1, 2, 2, 1], padding='SAME') #conv = tf.nn.dropout(conv, 1.0) biases = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) cifar10._activation_summary(conv1) # norm1 norm1 = tf.contrib.layers.batch_norm(conv1, scale=True, is_training=True, updates_collections=None) # conv2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 128, 128], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = cifar10._variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name=scope.name) #conv2 = tf.nn.dropout(conv2, 1.0) cifar10._activation_summary(conv2) # concat conv2 with norm1 to increase the number of features, this step does not affect the privacy preserving guarantee current = tf.concat((conv2, norm1), axis=3) # norm2 norm2 = tf.contrib.layers.batch_norm(current, scale=True, is_training=True, updates_collections=None) # conv3 with tf.variable_scope('conv3') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 256, 256], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(norm2, kernel, [1, 1, 1, 1], padding='SAME') #noiseless model biases = cifar10._variable_on_cpu('biases', [256], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(pre_activation, name=scope.name) #conv3 = tf.nn.dropout(conv3, 1.0) cifar10._activation_summary(conv3) # norm3 norm3 = tf.contrib.layers.batch_norm(conv3, scale=True, is_training=True, updates_collections=None) #pool3, row_pooling_sequence, col_pooling_sequence = tf.nn.fractional_max_pool(norm3, pooling_ratio=[1.0, 2.0, 2.0, 1.0]) pool3 = avg_pool(norm3, 2) # local4, note that we do not need to inject Laplace noise into the testing phase with tf.variable_scope('local4') as scope: weights = cifar10._variable_with_weight_decay('weights', shape=[5 * 5 * 256, hk], stddev=0.04, wd=0.004) biases = cifar10._variable_on_cpu('biases', [hk], tf.constant_initializer(0.1)) h_pool2_flat = tf.reshape(pool3, [-1, 5 * 5 * 256]) z2 = tf.add(tf.matmul(h_pool2_flat, weights), biases, name=scope.name) #Applying normalization for the flat connected layer h_fc1# batch_mean2, batch_var2 = tf.nn.moments(z2, [0]) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) BN_norm = tf.nn.batch_normalization(z2, batch_mean2, batch_var2, beta2, scale2, 1e-3) ### local4 = max_out(BN_norm, hk) cifar10._activation_summary(local4) weights = cifar10._variable_with_weight_decay('weights', [hk, 10], stddev=1 / (hk * 1.0), wd=0.0) biases = cifar10._variable_on_cpu('biases', [10], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) cifar10._activation_summary(softmax_linear) return softmax_linear