def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) eps_benign = 1 / (1 + eps2_ratio) * (epsilon2) eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity ) #2*3*(14*14 + 2)*16/(L*sensitivity) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local4') as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local5') as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #scale2 = tf.Variable(tf.ones([hk])) #beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] ######## # Build a Graph that computes the logits predictions from the # inference model. FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # Auto-Encoder # Enc_Layer2 = EncLayer(inpt=adv_x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0) ### x_image = x + noise y_conv = inference(x_image, FM_h, params) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) adv_x += adv_noise y_adv_conv = inference(adv_x, FM_h, params) adv_y_ = tf.placeholder(tf.float32, [None, 10]) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha, perturbW) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]); pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize( pretrain_adv + pretrain_benign, global_step=global_step, var_list=pretrain_var_list) train_op = cifar10.train(loss, global_step, learning_rate, _var_list=train_var_list) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(kernel1.initializer) dp_epsilon = 1.0 _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) delta_r = fgsm_eps * (image_size**2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW * (14**2) #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + ( 2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon) dynamic_eps = tf.placeholder(tf.float32) """y_test = inference(x, FM_h, params) softmax_y = tf.nn.softmax(y_test); c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0) x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])""" attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size, adv_noise=adv_noise) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. #tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32) #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3]) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32) #_W_adv = np.reshape(_W_adv, [32, 32, 3]) #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128) #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]); test_size = len(cifar10_data.test.images) #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt') #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta); #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) #generateNoise(image_size, Delta2, eps_adv, L, beta); Noise_test = generateIdLMNoise( image_size, 0, epsilon2_update, L) #generateNoise(image_size, 0, 2*epsilon2, test_size, beta); emsemble_L = int(L / 3) preT_epochs = 100 pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1) """logfile.write("pretrain: \n") for step in range(_global_step, _global_step + pre_T): d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test}) #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h}); batch = cifar10_data.train.next_batch(L); sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h}); if step % int(25*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128) logfile.write("step \t %d \t %g \n"%(step, cost_value)) print(cost_value) print('pre_train finished')""" _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() d_eps = random.random() * 0.5 batch = cifar10_data.train.next_batch(emsemble_L) #Get a random batch. y_adv_batch = batch[1] """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})""" adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) adv_images = np.append(np.append(adv_images_ifgsm, adv_images_mim, axis=0), adv_images_madry, axis=0) batch = cifar10_data.train.next_batch(L) #Get a random batch. sess.run(pretrain_step, feed_dict={ x: batch[0], adv_x: adv_images, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h }) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], adv_x: adv_images, adv_y_: y_adv_batch, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (50 * step_for_epoch) == 0 and step >= (300 * step_for_epoch): '''predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test}) argmax_predictions = np.argmax(softmax_predictions, axis=1) """for n_draws in range(0, 2000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L) _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);""" for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 2000; """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1)""" final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)''' #===================adv samples===================== log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format( step, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch( test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_bach[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros( [test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _BenignLNoise = generateIdLMNoise( image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if n_draws == 500: print("n_draws = 500") for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append( robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum( is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # Save the model checkpoint periodically. if step % (10 * step_for_epoch) == 0 and (step > _global_step): num_examples_per_step = L examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
def PDP_resnet_test(TIN_data, resnet_params, train_params, test_params, all_params): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] pretrain_vars = [] # put everything into GPU with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable( 'kernel1', shape=[ train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) pretrain_vars.append(kernel1) pretrain_vars.append(biases1) # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable( 'kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_sb') # input is the bunch of n_batchs x_test = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_test') y_sb = tf.placeholder( tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') FM_h = tf.placeholder(tf.float32, [ None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ], name='FM_h') # one time noise = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='noise') # one time adv_noise = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='adv_noise') # one time keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') # single GPU version with tf.device(GPU_NAME[0]): y_logits_test = test_inference(x_sb + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test_concat = tf.nn.softmax(y_logits_test) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('gamma' in var.name or 'beta' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print_var_list('pretrain_vars', pretrain_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') ###################################### # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # list all checkpoints in ckpt_path checkpoint_path_read = os.path.join(os.getcwd() + test_params.check_point_dir) ckpts = tf.train.get_checkpoint_state(checkpoint_path_read) print(ckpts) # last_checkpoints = tf.train.Saver.last_checkpoints # print(last_checkpoints) # find the ckpt we need to load and load it for ckpt in ckpts.all_model_checkpoint_paths: # print(ckpt) ckpt_step = int(ckpt.split('-')[-1]) if ckpt_step == test_params.step_to_load: saver.restore(sess, ckpt) print('model loaded from {}'.format(ckpt)) epsilon2_update = all_params['epsilon2_update'] dp_mult = all_params['dp_mult'] ####################################### # setup all attacks attack_switch = { 'fgsm': False, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_output_probs, output_layer='probs', adv_noise=adv_noise, keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) attack_tensor_testing_dict = {} mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) with tf.device(GPU_NAME[0]): if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / test_params.iter_step_testing, nb_iter=test_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod with tf.device(GPU_NAME[0]): if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['mim'] = mim_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / test_params.iter_step_testing, nb_iter=test_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) with tf.device(GPU_NAME[0]): if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['madry'] = madry_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / test_params.iter_step_testing, nb_iter=test_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) ##################################### # init noise perturbH_test = all_params['perturbH_test'] perturbFM_h = all_params['perturbFM_h'] Noise = all_params['Noise'] Noise_test = all_params['Noise_test'] #################################### print('start testing') log_file_path = os.getcwd() + test_params.log_file_path log_file = open(log_file_path, 'a', encoding='utf-8') for fgsm_eps in test_params.fgsm_eps_list: adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} attacks_and_benign = test_params.attacks + ['benign'] log_str = '' eps_start_time = time.time() # cover all test data for i in range(test_params.test_epochs): test_batch = TIN_data.test.next_batch( test_params.test_batch_size) adv_images_dict = {} # test for each attack for atk in attacks_and_benign: start_time = time.time() if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run( attack_tensor_testing_dict[atk], feed_dict={ x_sb: test_batch[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps], keep_prob: 1.0 }) testing_img = adv_images_dict[atk] else: continue print('adv gen time: {}s'.format(time.time() - start_time)) start_time = time.time() ### PixelDP Robustness ### predictions_form_argmax = np.zeros([ test_params.test_batch_size, train_params.num_classes ]) softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={ x_sb: testing_img, noise: Noise, FM_h: perturbFM_h, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(1, test_params.num_samples + 1): if n_draws % 100 == 0: print( 'current draws: {}, avg draw time: {}s'.format( n_draws, (time.time() - start_time) / n_draws)) _BenignLNoise = generateIdLMNoise( train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size) _perturbFM_h = np.random.laplace( 0.0, 2 * train_params.Delta2 / (epsilon2_update * train_params.effective_batch_size), train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters) _perturbFM_h = np.reshape(_perturbFM_h, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) for j in range(test_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h, keep_prob: 1.0}) * \ sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h), keep_prob: 1.0}) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_params.test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] += np.sum( is_correct) * 1.0 / test_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum( is_robust) * 1.0 / test_params.test_batch_size ############################## log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n") log_str += 'model trained epoch: {}\n'.format( test_params.epoch_to_test) log_str += 'fgsm_eps: {}\n'.format(fgsm_eps) log_str += 'iter_step_testing: {}\n'.format( test_params.iter_step_testing) log_str += 'num_samples: {}\n'.format(test_params.num_samples) for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs robust_adv_acc_dict[ atk] = robust_adv_acc_dict[atk] / test_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[ atk] / test_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str, flush=True) dt = time.time() - eps_start_time print('total test time: {}s'.format(dt), flush=True) print('*******************', flush=True) log_file.write(log_str) log_file.write('*******************\n') log_file.flush() dt = time.time() - start_time log_file.close()
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio)) ############################# ##Hyper-parameter Setting#### ############################# hk = 256; #number of hidden units at the last layer Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer D = 50000; #size of the dataset L = 2499; #batch size image_size = 28; padding = 4; #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units #gen_ratio = 1 epsilon1 = 0.0; #0.175; #epsilon for dpLRP epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer epsilon3 = 0.1*(1); #epsilon for the last hidden layer total_eps = epsilon1 + epsilon2 + epsilon3 print(total_eps) uncert = 0.1; #uncertainty modeling at the output layer infl = 1; #inflation rate in the privacy budget redistribution R_lowerbound = 1e-5; #lower bound of the LRP c = [0, 40, 50, 200] #norm bounds epochs = 200; #number of epochs preT_epochs = 50; #number of epochs T = int(D/L*epochs + 1); #number of steps T pre_T = int(D/L*preT_epochs + 1); step_for_epoch = int(D/L); #number of steps for one epoch broken_ratio = 1 #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10] #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2) #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2) #fgsm_eps = 0.1 rand_alpha = 0.05 ##Robustness## robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2; #### LRPfile = os.getcwd() + '/Relevance_R_0_075.txt'; ############################# mnist = input_data.read_data_sets("MNIST_data/", one_hot = True); ############################# ##Construct the Model######## ############################# #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer# #Compute the 1/|L| * Delta3 for the last hidden layer# """eps3_ratio = Delta3_adv/Delta3_benign; eps3_benign = 1/(1+eps3_ratio)*(epsilon3) eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)""" loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L); ### #End Step 4# # Parameters Declarification W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]); b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]); shape = W_conv1.get_shape().as_list() w_t = tf.reshape(W_conv1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*(14*14 + 2)*25/(L*sensitivity) dp_epsilon=1.0 #0.1 delta_r = fgsm_eps*(image_size**2); #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]); b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]); W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]); b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]); W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]); b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]); """scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) tf.add_to_collections([CONV_VARIABLES], scale2) tf.add_to_collections([CONV_VARIABLES], beta2)""" params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2] ### #Step 5: Create the model# noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); keep_prob = tf.placeholder(tf.float32); x = tf.placeholder(tf.float32, [None, image_size*image_size]); x_image = tf.reshape(x, [-1,image_size,image_size,1]); #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]); # pretrain ### #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise) ########### adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]); adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]); #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]); # pretrain adv ### #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32) #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]); Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h) Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h) ########### x_image += noise; x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature. adv_image += adv_noise; adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature. #perturbFM = np.random.laplace(0.0, scale3_benign, hk) #perturbFM = np.reshape(perturbFM, [hk]); perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10) perturbFM = np.reshape(perturbFM, [hk, 10]); y_conv = inference(x_image, perturbFM, hk, FM_h, params); softmax_y_conv = tf.nn.softmax(y_conv) #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T) #perturbFM = np.random.laplace(0.0, scale3_adv, hk) #perturbFM = np.reshape(perturbFM, [hk]); y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params); #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T) # test model perturbFM_test = np.random.laplace(0.0, 0, hk) perturbFM_test = np.reshape(perturbFM_test, [hk]); x_test = tf.reshape(x, [-1,image_size,image_size,1]); y_test = inference(x_test, perturbFM_test, hk, FM_h, params); #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T) #Define a place holder for the output label# y_ = tf.placeholder(tf.float32, [None, 10]); adv_y_ = tf.placeholder(tf.float32, [None, 10]); #End Step 5# ############################# ############################# ##Define loss and Optimizer## ############################# ''' Computes differentially private sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have: Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv))); = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) = F1 + F2 where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2 To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2. Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data. Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows: By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2): perturbFM = np.random.laplace(0.0, scale3, hk * 10) perturbFM = np.reshape(perturbFM/L, [hk, 10]); where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3); To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow]. Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow ''' ### Taylor for benign x zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype) cond = (y_conv >= zeros) relu_logits = array_ops.where(cond, y_conv, zeros) neg_abs_logits = array_ops.where(cond, -y_conv, y_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_benign = tf.abs(y_conv - y_) ### Taylor for adv_x zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype) cond_adv = (y_adv_conv >= zeros_adv) relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv) neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_adv = tf.abs(y_adv_conv - adv_y_) ### Adversarial training loss adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv) '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.''' global_step = tf.Variable(0, trainable=False) pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list); train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list); sess = tf.InteractiveSession(); # Define the correct prediction and accuracy # This needs to be changed to "Robust Prediction" correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1)); accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32)); ############# # use these to get predictions wrt to robust conditions """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32)) accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask) #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32)))) certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))""" ############# # craft adversarial samples from x for training dynamic_eps = tf.placeholder(tf.float32); emsemble_L = int(L/3) softmax_y = tf.nn.softmax(y_test) #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0) c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]); #====================== attack ========================= #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} #other possible attacks: # ElasticNetMethod # FastFeatureAdversaries # LBFGS # SaliencyMapMethod # VirtualAdversarialMethod # y_test = logits (before softmax) # softmax_y_test = preds (probs, after softmax) softmax_y_test = tf.nn.softmax(y_test) # create saver saver = tf.train.Saver(tf.all_variables()) sess.run(W_conv1.initializer) _gamma = sess.run(gamma) _gamma_x = Delta2/L epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) print(epsilon2_update/_gamma + epsilon2_update/_gamma_x) print(epsilon2_update) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(14**2) dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) ############################# iterativeStep = 100 # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train') if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path); saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') start_time = time.time(); # adv pretrain model (Auto encoder layer) cost = tf.reduce_sum(Enc_Layer2.cost); logfile.write("pretrain: \n") # define cleverhans abstract models for using cleverhans attacks ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) # rand+fgsm # if attack_switch['randfgsm']: # randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) # x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0)) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]); attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # Deepfool if attack_switch['deepfool']: print('creating attack tensor of DeepFool') deepfool_obj = DeepFool(model=ch_model_logits, sess=sess) #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['deepfool'] = x_adv_test_deepfool # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # SPSA # note here the epsilon is the infinity norm instead of precent of perturb # Maybe exclude this method first, since it seems to have some constrain about the data value range if attack_switch['spsa']: print('creating attack tensor of SPSA') spsa_obj = SPSA(model=ch_model_logits, sess=sess) #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2) x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1) attack_tensor_dict['spsa'] = x_adv_test_spsa # CarliniWagnerL2 # confidence=0 is fron their paper # it is said to be slow, maybe exclude first if attack_switch['cwl2']: print('creating attack tensor of CarliniWagnerL2') cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess) #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['cwl2'] = x_adv_test_cwl2 # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry # SpatialTransformationMethod # the params are pretty different from on the paper # so I use default # exclude since there's bug if attack_switch['stm']: print('creating attack tensor of SpatialTransformationMethod') stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess) #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2) x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6) attack_tensor_dict['stm'] = x_adv_test_stm #====================== attack ========================= sess.run(tf.initialize_all_variables()); ##perturb h for training perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); ##perturb h for testing perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32) perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]); '''for i in range(_global_step, _global_step + pre_T): d_eps = random.random(); batch = mnist.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) """batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)""" batch_2 = mnist.train.next_batch(L); pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h}); if i % int(5*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) logfile.write("step \t %d \t %g \n"%(i, cost_value)) print(cost_value) pre_train_finish_time = time.time() print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))''' # train and test model with adv samples max_benign_acc = -1; max_robust_benign_acc = -1 #max_adv_acc = -1; test_size = len(mnist.test.images) AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size); Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L); BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); last_eval_time = -1 accum_time = 0 accum_epoch = 0 max_adv_acc_dict = {} max_robust_adv_acc_dict = {} #max_robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in max_adv_acc_dict: max_adv_acc_dict[atk] = -1 max_robust_adv_acc_dict[atk] = -1 for i in range(_global_step, _global_step + T): # this batch is for generating adv samples batch = mnist.train.next_batch(emsemble_L); #Get a random batch. y_adv_batch = batch[1] #The number of epochs we print out the result. Print out the result every 5 epochs. if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch): cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) print(cost_value) if last_eval_time < 0: last_eval_time = time.time() #===================benign samples===================== predictions_form_argmax = np.zeros([test_size, 10]) #test_bach = mnist.test.next_batch(test_size) softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size max_benign_acc = max(max_benign_acc, acc) max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility) log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility) #===================adv samples===================== #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 2000): if n_draws % 1000 == 0: print(n_draws) _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h}) #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk]) max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc)) # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc)); # estimate end time """if i > 0 and i % int(10*step_for_epoch) == 0: current_time_interval = time.time() - last_eval_time last_eval_time = time.time() print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval))) accum_time += current_time_interval accum_epoch += 10 estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch) print('estimate finish in: {}'.format(parse_time(estimate_time)))""" #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty}))); """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=i);""" d_eps = random.random(); y_adv = batch[1] adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) """for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})""" batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0) batch = mnist.train.next_batch(L); #Get a random batch. # train with benign and adv samples pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h}); train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h}); duration = time.time() - start_time; # print(parse_time(duration)); #print running time duration# max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc) for atk in attack_switch.keys(): if attack_switch[atk]: max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk]) logfile.write(max_acc_string + '\n') logfile.write(str(duration) + '\n')
def PDP_resnet_with_pretrain_adv(TIN_data, resnet_params, train_params, params_to_save): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] pretrain_vars = [] with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable('kernel1', shape=[train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) pretrain_vars.append(kernel1) pretrain_vars.append(biases1) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*train_params.Delta2/(train_params.effective_batch_size * sensitivity) print('gamma: {}'.format(gamma)) # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable('kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb') # input is the bunch of n_batchs x_sb_adv = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb_adv') x_test = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_test') y_sb = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_sb_adv = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb_adv') y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') FM_h = tf.placeholder(tf.float32, [None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters], name='FM_h') # one time noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='noise') # one time adv_noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='adv_noise') # one time learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate') keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') # list of grads for each GPU tower_pretrain_grads = [] tower_train_grads = [] all_train_loss = [] # optimizers pretrain_opt = tf.train.AdamOptimizer(learning_rate) train_opt = tf.train.AdamOptimizer(learning_rate) # model and loss on one GPU with tf.device('/gpu:{}'.format(GPU_IDX[0])): # setup encoding layer training with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: Enc_Layer2 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(x_sb_adv)[0], Delta=train_params.Delta2, epsilon=train_params.epsilon2, batch_size=None, learning_rate=None, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape=tf.shape(x_sb)[0], Delta=train_params.Delta2, epsilon=train_params.epsilon2, batch_size=None, learning_rate=None, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) pretrain_cost = tf.reduce_mean(pretrain_adv + pretrain_benign) print_var('pretrain_cost', pretrain_cost) # use standard loss first y_logits = inference(x_sb + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax = tf.nn.softmax(y_logits) y_logits_adv = inference(x_sb_adv + adv_noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_adv = tf.nn.softmax(y_logits_adv) # taylor exp # TODO: use noise here perturbW = train_params.perturbFM * final_w # train_loss = TaylorExp_no_noise(y_softmax, y_sb, y_softmax_adv, y_sb_adv, # train_params.effective_batch_size, train_params.alpha) train_loss = TaylorExp(y_softmax, y_sb, y_softmax_adv, y_sb_adv, train_params.effective_batch_size, train_params.alpha, perturbW) print_var('train_loss', train_loss) all_train_loss.append(train_loss) # split testing in each gpu x_sb_tests = tf.split(x_sb, N_ALL_GPUS, axis=0) y_softmax_test_list = [] for gpu in range(N_ALL_GPUS): with tf.device('/gpu:{}'.format(gpu)): # testing graph now in each gpu y_logits_test = test_inference(x_sb_tests[gpu] + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test_list.append(tf.nn.softmax(y_logits_test)) y_softmax_test_concat = tf.concat(y_softmax_test_list, axis=0) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into trainable variable list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('gamma' in var.name or 'beta' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print_var_list('pretrain_vars', pretrain_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # op for compute grads on one gpu with tf.device('/gpu:{}'.format(GPU_IDX[0])): # get all update_ops (updates of moving averageand std) for batch normalizations update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print_op_list('update ops', update_ops) enc_update_ops = [op for op in update_ops if 'enc_layer' in op.name] print_op_list('enc layer update ops', enc_update_ops) # when the gradients are computed, update the batch_norm with tf.control_dependencies(enc_update_ops): pretrain_grads = pretrain_opt.compute_gradients(pretrain_cost, var_list=pretrain_vars) print('*********** pretrain_grads ***********') for x in pretrain_grads: print(x) print('**********************') with tf.control_dependencies(update_ops): train_grads = train_opt.compute_gradients(train_loss, var_list=train_vars) print('*********** train_grads ***********') for x in train_grads: print(x) print('**********************') avg_pretrain_grads = pretrain_grads avg_train_grads = train_grads # get averaged loss tensor for pretrain and train ops total_loss = tf.reduce_sum(tf.stack(all_train_loss)) total_pretrain_loss = tf.reduce_mean(pretrain_cost) # prepare to save gradients for large batch pretrain_grads_save = [g for g,v in pretrain_grads] # print('*********** pretrain_grads_save ***********' + str(pretrain_grads_save) + '**********************') train_grads_save = [g for g,v in train_grads] # print('*********** train_grads_save ***********' + str(train_grads_save) + '**********************') pretrain_grads_shapes = [g.shape.as_list() for g in pretrain_grads_save] train_grads_shapes = [g.shape.as_list() for g in train_grads_save] # placeholders for importing saved gradients pretrain_grads_placeholders = [] for g,v in pretrain_grads: pretrain_grads_placeholders.append(tf.placeholder(tf.float32, v.shape)) train_grads_placeholders = [] for g,v in train_grads: train_grads_placeholders.append(tf.placeholder(tf.float32, v.shape)) # construct the (grad, var) list assemble_pretrain_grads = [] for i in range(len(pretrain_vars)): assemble_pretrain_grads.append((pretrain_grads_placeholders[i], pretrain_vars[i])) assemble_train_grads = [] for i in range(len(train_grads)): assemble_train_grads.append((train_grads_placeholders[i], train_vars[i])) # apply the saved gradients pretrain_op = pretrain_opt.apply_gradients(assemble_pretrain_grads, global_step=global_step) train_op = train_opt.apply_gradients(assemble_train_grads, global_step=global_step) ###################################### # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth=True sess = tf.Session(config=config) print("session created") # get some initial values sess.run(kernel1.initializer) _gamma = sess.run(gamma) _gamma_x = train_params.Delta2 / train_params.effective_batch_size epsilon2_update = train_params.epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) delta_r = train_params.fgsm_eps * (train_params.image_size ** 2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(train_params.enc_h_size ** 2) #dp_mult = (train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update)) / (delta_r / train_params.dp_epsilon) + \ # (2 * train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update))/(delta_h / train_params.dp_epsilon) dp_mult = (train_params.Delta2*train_params.dp_epsilon) / (train_params.effective_batch_size*epsilon2_update * (delta_h / 2 + delta_r)) # save some valus for testing params_to_save['epsilon2_update'] = epsilon2_update params_to_save['dp_mult'] = dp_mult ####################################### # ADV attacks ####################################### # split input for attacks x_attacks = tf.split(x_sb, 3, axis=0) # split it into each batch # currently only ifgsm, mim, and madry attacks are available attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} # wrap the inference ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_output_probs, output_layer='probs', adv_noise=adv_noise, keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) # to save the reference to the attack tensors attack_tensor_training_dict = {} attack_tensor_testing_dict = {} # placeholder for eps parameter mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): print('ifgsm GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[0])) if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict['ifgsm'] = ifgsm_obj.generate(x=x_attacks[0], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[1])): print('mim GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[1])) if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict['mim'] = mim_obj.generate(x=x_attacks[1], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['mim'] = mim_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[2])): print('madry GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[2])) if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_training_dict['madry'] = madry_obj.generate(x=x_attacks[2], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['madry'] = madry_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # combine the tensors adv_tensors_concat = tf.concat([attack_tensor_training_dict[x] for x in train_params.attacks], axis=0) ####################################### # init op print('initialize_all_variables') init = tf.initialize_all_variables() sess.run(init) # load pretrained variables of RESNET if train_params.load_weights: # first we need to load variable name convert table tgt_var_name_dict = {} with open(train_params.weight_table_path, 'r', encoding='utf-8') as inf: lines = inf.readlines() for line in lines: var_names = line.strip().split(' ') if var_names[1] == 'NONE': continue else: tgt_var_name_dict[var_names[0]] = var_names[1] # load variables dict from checkpoint pretrained_var_dict = load_pretrained_vars() # load pre-trained vars using name convert table for var in tf.global_variables(): if var.name in tgt_var_name_dict: # print('var \"{}\" found'.format(var.name)) try: var.load(pretrained_var_dict[tgt_var_name_dict[var.name]], session=sess) print('{} loaded'.format(var.name)) except: print('var {} not loaded since shape changed'.format(var.name)) else: if 'Adam' not in var.name: print('var \"{}\" NOT FOUND'.format(var.name)) else: print('Training model from scratch') ##################################### # init noise and save for testing perturbH_test = np.random.laplace(0.0, 0, train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) perturbH_test = np.reshape(perturbH_test, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) params_to_save['perturbH_test'] = perturbH_test perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) perturbFM_h = np.reshape(perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) params_to_save['perturbFM_h'] = perturbFM_h Noise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size) params_to_save['Noise'] = Noise Noise_test = generateIdLMNoise(train_params.image_size, 0, epsilon2_update, train_params.effective_batch_size) params_to_save['Noise_test'] = Noise_test # save params for testing with open(os.getcwd() + train_params.params_save_path, 'wb') as outf: pickle.dump(params_to_save, outf) print('params saved') #################################### print('start pretrain') start_time = time.time() lr_schedule_list = sorted(train_params.lr_schedule_pretrain.keys()) attacks_and_benign = train_params.attacks + ['benign'] # build zeros numpy arrays for accumulate grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] total_pretrain_loss_value = 0.0 step = 0 # pretrain loop while True: # if enough steps, break if step > train_params.pretrain_steps: break # add steps here so not forgot else: step += 1 # manual schedule learning rate current_epoch = step // (train_params.epoch_steps) current_lr = train_params.lr_schedule_pretrain[get_lr(current_epoch, lr_schedule_list)] # benign and adv batch super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) # get pretrain grads pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], x_sb_adv: adv_super_batch[0], learning_rate: current_lr, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h}) # accumulate grads for i in range(len(accumu_pretrain_grads)): accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i] # accumulate loss values total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value # use accumulated gradients to update variables if step % train_params.batch_multi == 0 and step > 0: # print('effective batch reached at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) # compute the average grads and build the feed dict pretrain_feed_dict = {} for i in range(len(accumu_pretrain_grads)): pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi pretrain_feed_dict[learning_rate] = current_lr # run train ops by feeding the gradients sess.run(pretrain_op, feed_dict=pretrain_feed_dict) # get loss value avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi # reset the average grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] total_pretrain_loss_value = 0.0 # print loss if step % (1*train_params.epoch_steps) == 0 and step >= (1*train_params.epoch_steps): print('pretrain report at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) print('pretrain_loss: {:.6f}'.format(avg_pretrain_loss_value)) #################################### print('start train') start_time = time.time() lr_schedule_list = sorted(train_params.lr_schedule.keys()) # train whole model # build zeros numpy arrays for accumulate grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes] total_pretrain_loss_value = 0.0 total_train_loss_value = 0.0 step = 0 # train loop while True: # if enough steps, break if step > train_params.train_steps: break # add steps here so not forgot else: step += 1 # compute the grads every step # random eps value for trianing d_eps = random.random()*train_params.random_eps_range # manual schedule learning rate current_epoch = step // (train_params.epoch_steps) current_lr = train_params.lr_schedule[get_lr(current_epoch, lr_schedule_list)] # benign and adv batch super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) # create adv samples super_batch_adv_images = sess.run(adv_tensors_concat, feed_dict={x_sb:adv_super_batch[0], keep_prob:1.0, adv_noise: Noise, mu_alpha:[d_eps]}) # get pretrain and train grads pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], x_sb_adv: super_batch_adv_images, learning_rate: current_lr, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h}) train_grads_save_np, _train_loss_value = sess.run([train_grads_save, total_loss], feed_dict = {x_sb: super_batch[0], y_sb: super_batch[1], x_sb_adv: super_batch_adv_images, y_sb_adv: adv_super_batch[1], keep_prob: train_params.keep_prob, learning_rate: current_lr, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h}) # accumulate grads for i in range(len(accumu_pretrain_grads)): accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i] for i in range(len(accumu_train_grads)): accumu_train_grads[i] = accumu_train_grads[i] + train_grads_save_np[i] # accumulate loss values total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value total_train_loss_value = total_train_loss_value + _train_loss_value # use accumulated gradients to update variables if step % train_params.batch_multi == 0 and step > 0: # compute the average grads and build the feed dict pretrain_feed_dict = {} for i in range(len(accumu_pretrain_grads)): pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi pretrain_feed_dict[learning_rate] = current_lr # pretrain_feed_dict[keep_prob] = 0.5 train_feed_dict = {} for i in range(len(accumu_train_grads)): train_feed_dict[train_grads_placeholders[i]] = accumu_train_grads[i] / train_params.batch_multi train_feed_dict[learning_rate] = current_lr # train_feed_dict[keep_prob] = 0.5 # run train ops sess.run(pretrain_op, feed_dict=pretrain_feed_dict) sess.run(train_op, feed_dict=train_feed_dict) # get loss value avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi avg_train_loss_value = total_train_loss_value / train_params.batch_multi # reset the average grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes] total_pretrain_loss_value = 0.0 total_train_loss_value = 0.0 # print status every epoch if step % int(train_params.epoch_steps) == 0: dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) # save model if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_save: print('saving model at epoch {}'.format(step / train_params.epoch_steps)) checkpoint_path = os.path.join(os.getcwd() + train_params.check_point_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # testing during training if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_test: test_start = time.time() print('train test reported at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) print('pretrain_loss: {:.6f}, train_loss: {:.6f}'.format(avg_pretrain_loss_value, avg_train_loss_value)) # print('output layer: \n\t{}'.format(output_layer_value)) #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} log_str = '' # cover all test data for i in range(train_params.test_epochs): test_batch = TIN_data.test.next_batch(train_params.test_batch_size) # if more GPUs available, generate testing adv samples at once if N_AUX_GPUS > 1: adv_images_dict = sess.run(attack_tensor_testing_dict, feed_dict ={x_sb: test_batch[0], adv_noise: Noise_test, mu_alpha: [train_params.fgsm_eps], keep_prob: 1.0}) else: adv_images_dict = {} # test for each attack for atk in attacks_and_benign: if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run(attack_tensor_testing_dict[atk], feed_dict ={x_sb:test_batch[0], adv_noise: Noise_test, mu_alpha:[train_params.fgsm_eps], keep_prob: 1.0}) testing_img = adv_images_dict[atk] else: continue ### PixelDP Robustness ### predictions_form_argmax = np.zeros([train_params.test_batch_size, train_params.num_classes]) softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: perturbFM_h, keep_prob: 1.0}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, train_params.num_samples): _BenignLNoise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size) _perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) _perturbFM_h = np.reshape(_perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) for j in range(train_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h, keep_prob: 1.0}) * \ sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h), keep_prob: 1.0}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(train_params.test_batch_size): is_correct.append(np.argmax(test_batch[1][j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=train_params.fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= train_params.fgsm_eps) adv_acc_dict[atk] += np.sum(is_correct)*1.0/train_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum(is_robust)*1.0/train_params.test_batch_size ############################## # average all acc for whole test data for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / train_params.test_epochs robust_adv_acc_dict[atk] = robust_adv_acc_dict[atk] / train_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[atk] / train_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) dt = time.time() - test_start print('testing time: {}'.format(dt)) print(log_str, flush=True) print('*******************')
def test(cifar10_data, checkpoint_path, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, parameter_dict, testing_step): # logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n"%(fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" # make sure variables are placed on cpu # TODO: for AWS version, check if put variables on GPU will be better with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) attacks = ['ifgsm', 'mim', 'madry'] # manually create all scopes with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope: scope_conv1 = scope with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope: scope_conv2 = scope with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope: scope_conv3 = scope with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope: scope_local4 = scope with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope: scope_local5 = scope # Parameters Declarification #with tf.variable_scope('conv1') as scope: # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): with tf.variable_scope(scope_conv1) as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) # shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity) with tf.variable_scope(scope_conv2) as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_conv3) as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local4) as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local5) as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) # group these for use as parameters params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] scopes = [ scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5 ] # placeholders for input values FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) # one time noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # one time adv_noise = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) # one time x = tf.placeholder(tf.float32, [None, image_size, image_size, 3 ]) # input is the bunch of n_batchs y = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs # benign conv output bi = 0 x_image = x + noise # with tf.device('/gpu:0'): y_conv = inference(x_image, FM_h, params, scopes, training=True, bn_index=bi) softmax_y_conv = tf.nn.softmax(y_conv) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") dp_epsilon = 1.0 epsilon2_update = parameter_dict['epsilon2_update'] delta_r = parameter_dict['delta_r'] _sensitivityW = parameter_dict['_sensitivityW'] delta_h = parameter_dict['delta_h'] dp_mult = parameter_dict['dp_mult'] # ============== attacks ================ iter_step_training = parameter_dict['iter_step_training'] ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, scopes=scopes, image_size=image_size, adv_noise=adv_noise) attack_tensor_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # build each attack for atk in attacks: print('building attack {} tensors'.format(atk)) # for each gpu assign to each attack if atk == 'ifgsm': ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = ifgsm_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, clip_min=-1.0, clip_max=1.0) elif atk == 'mim': mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = mim_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) elif atk == 'madry': madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = madry_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, clip_min=-1.0, clip_max=1.0) # Create a saver and load checkpoint saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) saver.restore(sess, checkpoint_path) T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = parameter_dict[ 'step_for_epoch'] #number of steps for one epoch # load some fixed noise perturbH_test = parameter_dict['perturbH_test'] perturbFM_h = parameter_dict['perturbFM_h'] Noise = parameter_dict['Noise'] Noise_test = parameter_dict['Noise_test'] # test on testing dataset adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_batch_size = 5000 n_draw = 1000 begin_time = time.time() print('on testing set') print('test_batch_size: {}'.format(test_batch_size)) print('testing iteration: {}'.format(testing_step)) print('testing n_draw: {}'.format(n_draw)) atk_index = -1 for _ in [0, 1]: for atk in attacks: print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 # generate test samples test_batch = cifar10_data.test.next_batch(test_batch_size) adv_images = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_batch[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_batch_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) argmax_labels = np.argmax(test_batch[1], axis=1) print('labels') print(argmax_labels[0:100]) print('init predictions') print(argmax_predictions[0:100]) for _n_draws in range(0, n_draw): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if _n_draws == 500 or _n_draws == 1000: print("n_draws = 500/1000") print('time passed: {}s'.format(time.time() - begin_time)) for j in range(test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run(softmax_y_conv, feed_dict={ x: adv_images, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax print('final predictions') print(np.argmax(final_predictions, axis=1)[0:100]) is_correct = [] is_robust = [] for j in range(test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_batch_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_batch_size ############################## log_str = 'testing, eps: {}; steps: {};'.format( fgsm_eps, testing_step) for atk in attacks: log_str += "\n{}: {:.4f} {:.4f} {:.4f} {:.4f} ".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str, flush=True) tf.reset_default_graph()