def createAttack(model, sess, x, y, X_test, y_test, eps = 0.02): from cleverhans.attacks import MadryEtAl print("Beginning PGD attack") pgd = MadryEtAl(model, back='tf', sess=sess) preds = model(x) t0 = time.time() batch_size = 64 # Incredibly horrible and ugly way to iterate over X_test. Sorry. X_test_adv_pgd = np.zeros(X_test.shape) num_batches = X_test.shape[0] // batch_size for i in range(X_test.shape[0] // batch_size): batch_start = batch_size*i batch_end = batch_size*(i+1) batch = X_test[batch_start:batch_end] if not (i % 20): print("attacking batch", i, "from ", batch_start, " to ", batch_end, file=sys.stderr) attack_target = 1 - y_test[batch_start:batch_end] pgd_params = {'eps': eps, 'eps_iter': 0.01, 'clip_min': -1., 'clip_max': 1., 'nb_iter': 20, 'y_target': attack_target} X_test_adv_pgd[batch_start:batch_end] = pgd.generate_np(batch, **pgd_params) if X_test.shape[0] % batch_size: batch_start = (num_batches * batch_size ) batch_end = X_test.shape[0] batch = X_test[batch_start:batch_end].reshape((-1,224,224,3)) print("attacking residual batch from ", batch_start, " to ", batch_end, file=sys.stderr) attack_target = 1 - y_test[batch_start:batch_end].reshape((-1,2)) pgd_params = {'eps': eps, 'eps_iter': 0.01, 'clip_min': -1., 'clip_max': 1., 'nb_iter': 20, 'y_target': attack_target} X_test_adv_pgd[batch_start:batch_end] = pgd.generate_np(batch, **pgd_params) # Report on timing t1 = time.time() total = t1-t0 m, s = divmod(total, 60) h, m = divmod(m, 60) print ("Completed attack in %d:%02d:%02d" % (h, m, s)) return X_test_adv_pgd
def setUp(self): super(TestMadryEtAl, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = MadryEtAl(self.model, sess=self.sess)
def load_attack(sess, attack_method, model, targeted, adv_ys, eps, batch_size): if attack_method == 'fgsm': from cleverhans.attacks import FastGradientMethod model_prob = lambda x: model.predict(x, softmax=True) attack = FastGradientMethod(model_prob, sess=sess) attack_params, yname = config_fgsm(targeted, adv_ys, eps, batch_size) if attack_method == 'pgd': from cleverhans.attacks import MadryEtAl model_prob = lambda x: model.predict(x, softmax=True) attack = MadryEtAl(model_prob, sess=sess) attack_params, yname = config_madry(targeted, adv_ys, eps, batch_size) if attack_method == 'mim': from cleverhans.attacks import MomentumIterativeMethod model_prob = lambda x: model.predict(x, softmax=True) attack = MomentumIterativeMethod(model_prob, sess=sess) attack_params, yname = config_mim(targeted, adv_ys, eps, batch_size) if attack_method == 'cw': from cleverhans.attacks import CarliniWagnerL2 model_logit = lambda x: model.predict(x, softmax=False) attack = CarliniWagnerL2(model_logit, sess=sess) attack_params, yname = config_cw(targeted, adv_ys, eps, batch_size) return attack, attack_params, yname
def initialize_attack(model, sess): from cleverhans.attacks import MadryEtAl from cleverhans.attacks import FastGradientMethod from cleverhans.utils_keras import KerasModelWrapper wrap = KerasModelWrapper(model) #fgsm = FastGradientMethod(wrap, sess=sess) fgsm = MadryEtAl(wrap, sess=sess) #fgsm = FastGradientMethod(model, sess=sess) del model return fgsm
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) eps_benign = 1 / (1 + eps2_ratio) * (epsilon2) eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity ) #2*3*(14*14 + 2)*16/(L*sensitivity) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local4') as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local5') as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #scale2 = tf.Variable(tf.ones([hk])) #beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] ######## # Build a Graph that computes the logits predictions from the # inference model. FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # Auto-Encoder # Enc_Layer2 = EncLayer(inpt=adv_x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0) ### x_image = x + noise y_conv = inference(x_image, FM_h, params) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) adv_x += adv_noise y_adv_conv = inference(adv_x, FM_h, params) adv_y_ = tf.placeholder(tf.float32, [None, 10]) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha, perturbW) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]); pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize( pretrain_adv + pretrain_benign, global_step=global_step, var_list=pretrain_var_list) train_op = cifar10.train(loss, global_step, learning_rate, _var_list=train_var_list) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(kernel1.initializer) dp_epsilon = 1.0 _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) delta_r = fgsm_eps * (image_size**2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW * (14**2) #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + ( 2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon) dynamic_eps = tf.placeholder(tf.float32) """y_test = inference(x, FM_h, params) softmax_y = tf.nn.softmax(y_test); c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0) x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])""" attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size, adv_noise=adv_noise) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. #tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32) #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3]) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32) #_W_adv = np.reshape(_W_adv, [32, 32, 3]) #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128) #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]); test_size = len(cifar10_data.test.images) #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt') #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta); #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) #generateNoise(image_size, Delta2, eps_adv, L, beta); Noise_test = generateIdLMNoise( image_size, 0, epsilon2_update, L) #generateNoise(image_size, 0, 2*epsilon2, test_size, beta); emsemble_L = int(L / 3) preT_epochs = 100 pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1) """logfile.write("pretrain: \n") for step in range(_global_step, _global_step + pre_T): d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test}) #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h}); batch = cifar10_data.train.next_batch(L); sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h}); if step % int(25*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128) logfile.write("step \t %d \t %g \n"%(step, cost_value)) print(cost_value) print('pre_train finished')""" _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() d_eps = random.random() * 0.5 batch = cifar10_data.train.next_batch(emsemble_L) #Get a random batch. y_adv_batch = batch[1] """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})""" adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) adv_images = np.append(np.append(adv_images_ifgsm, adv_images_mim, axis=0), adv_images_madry, axis=0) batch = cifar10_data.train.next_batch(L) #Get a random batch. sess.run(pretrain_step, feed_dict={ x: batch[0], adv_x: adv_images, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h }) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], adv_x: adv_images, adv_y_: y_adv_batch, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (50 * step_for_epoch) == 0 and step >= (300 * step_for_epoch): '''predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test}) argmax_predictions = np.argmax(softmax_predictions, axis=1) """for n_draws in range(0, 2000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L) _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);""" for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 2000; """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1)""" final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)''' #===================adv samples===================== log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format( step, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch( test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_bach[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros( [test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _BenignLNoise = generateIdLMNoise( image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if n_draws == 500: print("n_draws = 500") for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append( robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum( is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # Save the model checkpoint periodically. if step % (10 * step_for_epoch) == 0 and (step > _global_step): num_examples_per_step = L examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
def test(): """ """ tf.reset_default_graph() g = tf.get_default_graph() with g.as_default(): # Placeholder nodes. images_holder = tf.placeholder( tf.float32, [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS]) label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES]) is_training = tf.placeholder(tf.bool, ()) # model model = model_cifar100.RDPCNN(images_holder, label_holder, FLAGS.INPUT_SIGMA, is_training) # for adv examples model_loss = model.loss() model_acc = model.cnn_accuracy # robust def inference(x): logits, _ = model.cnn.prediction(x) return logits def inference_prob(x): _, probs = model.cnn.prediction(x) return probs graph_dict = {} graph_dict["images_holder"] = images_holder graph_dict["label_holder"] = label_holder graph_dict["is_training"] = is_training config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config, graph=g) as sess: sess.run(tf.global_variables_initializer()) # load model model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME) # adv test #################################################################################################### x_advs = {} ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # FastGradientMethod fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_advs["fgsm"] = fgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, clip_min=0.0, clip_max=1.0) # testing now # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_advs["mim"] = mim_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_advs["madry"] = madry_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) graph_dict["x_advs"] = x_advs #################################################################################################### # tensorboard writer #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g) print("\nTest") if FLAGS.local: total_test_batch = 2 else: total_test_batch = None dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item() test_info(sess, model, True, graph_dict, dp_info, FLAGS.TEST_LOG_FILENAME, total_batch=total_test_batch) robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
def setUp(self): super(TestMadryEtAl, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = MadryEtAl(self.model, sess=self.sess)
def main(args): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize]) dataset = ImageDataset(args.image_folder, transform=transform, return_paths=True) # n_images = len(dataset) dataloader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, pin_memory=True, num_workers=0) model = models.resnet50(pretrained=True).to(args.device) model.eval() config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 224, 224, )) tf_model = convert_pytorch_model_to_tf(model, args.device) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') # compute clip_min and clip_max suing a full black and a full white image clip_min = normalize(torch.zeros(3, 1, 1)).min().item() clip_max = normalize(torch.ones(3, 1, 1)).max().item() eps = args.eps / 255. eps_iter = 20 nb_iter = 10 args.ord = np.inf if args.ord < 0 else args.ord grad_params = {'eps': eps, 'ord': args.ord} common_params = {'clip_min': clip_min, 'clip_max': clip_max} iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter} attack_name = '' if args.attack == 'fgsm': attack_name = '_L{}_eps{}'.format(args.ord, args.eps) attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params} elif args.attack == 'iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = BasicIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'm-iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'pgd': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'jsma': attack_op = SaliencyMapMethod(cleverhans_model, sess=sess) attack_params = {'theta': eps, 'symbolic_impl': False, **common_params} elif args.attack == 'deepfool': attack_op = DeepFool(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'cw': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'lbfgs': attack_op = LBFGS(cleverhans_model, sess=sess) target = np.zeros((1, 1000)) target[0, np.random.randint(1000)] = 1 y = tf.placeholder(tf.float32, target.shape) attack_params = {'y_target': y, **common_params} attack_name = args.attack + attack_name print('Running [{}]. Params: {}'.format(args.attack.upper(), attack_params)) adv_x_op = attack_op.generate(x_op, **attack_params) adv_preds_op = tf_model(adv_x_op) preds_op = tf_model(x_op) n_success = 0 n_processed = 0 progress = tqdm(dataloader) for paths, x in progress: progress.set_description('ATTACK') z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op], feed_dict={ x_op: x, y: target }) src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1) success = src != dst success_paths = np.array(paths)[success] success_adv_x = adv_x[success] success_src = src[success] success_dst = dst[success] n_success += success_adv_x.shape[0] n_processed += x.shape[0] progress.set_postfix( {'Success': '{:3.2%}'.format(n_success / n_processed)}) progress.set_description('SAVING') for p, a, s, d in zip(success_paths, success_adv_x, success_src, success_dst): path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d) path = os.path.join(args.out_folder, path) np.savez_compressed(path, img=a)
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile, parameter_dict): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" # make sure variables are placed on cpu # TODO: for AWS version, check if put variables on GPU will be better with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) attacks = ['ifgsm', 'mim', 'madry'] # manually create all scopes with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope: scope_conv1 = scope with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope: scope_conv2 = scope with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope: scope_conv3 = scope with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope: scope_local4 = scope with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope: scope_local5 = scope # Parameters Declarification #with tf.variable_scope('conv1') as scope: # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): with tf.variable_scope(scope_conv1) as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) # shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity) with tf.variable_scope(scope_conv2) as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_conv3) as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local4) as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local5) as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) # group these for use as parameters params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] scopes = [ scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5 ] # placeholders for input values FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) # one time noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # one time adv_noise = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) # one time x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3 ]) # input is the bunch of n_batchs x_list = tf.split(x_sb, N_GPUS, axis=0) # split it into each batch adv_x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x_list = tf.split(adv_x_sb, N_GPUS, axis=0) x_test = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs y_list = tf.split(y_sb, N_GPUS, axis=0) # split it into each batch adv_y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs # adv_y_list = tf.split(adv_y_sb, N_GPUS, axis=0) # split it into each batch y_test = tf.placeholder(tf.float32, [None, 10]) # re-arrange the input samples _split_adv_y_sb = tf.split(adv_y_sb, N_AUX_GPUS, axis=0) reorder_adv_y_sb = [] for i in range(N_GPUS): reorder_adv_y_sb.append( tf.concat([ _split_adv_y_sb[i + N_GPUS * atk_index] for atk_index in range(len(attacks)) ], axis=0)) tower_pretrain_grads = [] tower_train_grads = [] all_train_loss = [] pretrain_opt = tf.train.AdamOptimizer(learning_rate) train_opt = tf.train.GradientDescentOptimizer(learning_rate) # batch index bi = 0 for gpu in GPU_IDX: # putting ops on each tower (GPU) with tf.device('/gpu:{}'.format(gpu)): print('Train inference GPU placement') print('/gpu:{}'.format(gpu)) # Auto-Encoder # # pretrain_adv and pretrain_benign are cost tensor of the encoding layer with tf.variable_scope(scope_conv1) as scope: Enc_Layer2 = EncLayer(inpt=adv_x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2( xShape=tf.shape(adv_x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h, bn_index=bi) Enc_Layer3 = EncLayer(inpt=x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h, bn_index=bi) pretrain_cost = pretrain_adv + pretrain_benign # this cost is not used # cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0); # benign conv output x_image = x_list[bi] + noise y_conv = inference(x_image, FM_h, params, scopes, training=True, bn_index=bi) # softmax_y_conv = tf.nn.softmax(y_conv) # adv conv output adv_x_image = adv_x_list[bi] + adv_noise y_adv_conv = inference(adv_x_image, FM_h, params, scopes, training=True, bn_index=bi) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] train_loss = cifar10.TaylorExp(y_conv, y_list[bi], y_adv_conv, reorder_adv_y_sb[bi], L, alpha, perturbW) all_train_loss.append(train_loss) # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) # compute tower gradients pretrain_grads = pretrain_opt.compute_gradients( pretrain_cost, var_list=pretrain_var_list) train_grads = train_opt.compute_gradients( train_loss, var_list=train_var_list) # get_pretrain_grads(pretrain_cost, global_step, learning_rate, pretrain_var_list) # train_grads = get_train_grads(train_loss, global_step, learning_rate, train_var_list) # note this list contains grads and variables tower_pretrain_grads.append(pretrain_grads) tower_train_grads.append(train_grads) # batch index bi += 1 # average the gradient from each tower pretrain_var_dict = {} all_pretrain_grads = {} avg_pretrain_grads = [] for var in tf.get_collection(AECODER_VARIABLES): if var.name not in all_pretrain_grads: all_pretrain_grads[var.name] = [] pretrain_var_dict[var.name] = var for tower in tower_pretrain_grads: for var_grad in tower: all_pretrain_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_pretrain_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_pretrain_grads = [ tf.expand_dims(g, 0) for g in all_pretrain_grads[var_name] ] concat_pretrain_grads = tf.concat(expand_pretrain_grads, axis=0) reduce_pretrain_grads = tf.reduce_mean(concat_pretrain_grads, 0) # rebuild (grad, var) list avg_pretrain_grads.append( (reduce_pretrain_grads, pretrain_var_dict[var_name])) print('*****************************') print("avg_pretrain_grads:") for avg_pretrain_grad in avg_pretrain_grads: print('grads') print((avg_pretrain_grad[0].name, avg_pretrain_grad[0].shape)) print('var') print((avg_pretrain_grad[1].name, avg_pretrain_grad[1].shape)) print('------') train_var_dict = {} all_train_grads = {} avg_train_grads = [] for var in tf.get_collection(CONV_VARIABLES): if var.name not in all_train_grads: all_train_grads[var.name] = [] train_var_dict[var.name] = var for tower in tower_train_grads: for var_grad in tower: all_train_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_train_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_train_grads = [ tf.expand_dims(g, 0) for g in all_train_grads[var_name] ] concat_train_grads = tf.concat(expand_train_grads, axis=0) reduce_train_grads = tf.reduce_mean(concat_train_grads, 0) # rebuild (grad, var) list avg_train_grads.append( (reduce_train_grads, train_var_dict[var_name])) print('*****************************') print("avg_train_grads:") for avg_train_grad in avg_train_grads: print('grads') print((avg_train_grad[0].name, avg_train_grad[0].shape)) print('var') print((avg_train_grad[1].name, avg_train_grad[1].shape)) print('------') print('*****************************') # get averaged loss tensor avg_loss = tf.reduce_mean(tf.stack(all_train_loss), axis=0) # TODO: take the average of the bn variables from each tower/training GPU # currently, testing is using the bn variables on bn_index 0 (tower/training GPU 0) # build train op (apply average gradient to variables) # according to 1.13 doc, updates need to be manually applied _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print('update ops:') print(_update_ops) with tf.control_dependencies(_update_ops): pretrain_op = pretrain_opt.apply_gradients(avg_pretrain_grads, global_step=global_step) train_op = train_opt.apply_gradients(avg_train_grads, global_step=global_step) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # init kernel 1 and get some values from it sess.run(kernel1.initializer) dp_epsilon = 0.005 parameter_dict['dp_epsilon'] = dp_epsilon _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) parameter_dict['epsilon2_update'] = epsilon2_update print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) # NOTE: these values needs to be calculated in testing delta_r = fgsm_eps * (image_size**2) parameter_dict['delta_r'] = delta_r _sensitivityW = sess.run(sensitivity) parameter_dict['_sensitivityW'] = _sensitivityW delta_h = _sensitivityW * (14**2) parameter_dict['delta_h'] = delta_h #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) dp_mult = (Delta2) / (L * epsilon2_update * (delta_h / 2 + delta_r)) parameter_dict['dp_mult'] = dp_mult # place test-time inference into CPU with tf.device('/cpu:0'): # testing pipeline test_x_image = x_test + noise test_y_conv = inference(test_x_image, FM_h, params, scopes, training=True, bn_index=0) test_softmax_y_conv = tf.nn.softmax(test_y_conv) # ============== attacks ================ iter_step_training = 3 parameter_dict['iter_step_training'] = iter_step_training # iter_step_testing = 1000 aux_dup_count = N_GPUS # split input x_super_batch into N_AUX_GPUS parts x_attacks = tf.split(x_sb, N_AUX_GPUS, axis=0) # split input x_test into aux_dup_count parts x_test_split = tf.split(x_test, aux_dup_count, axis=0) # setup all attacks # attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, scopes=scopes, image_size=image_size, adv_noise=adv_noise) attack_tensor_training_dict = {} attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # build each attack for atk_idx in range(len(attacks)): atk = attacks[atk_idx] print('building attack {} tensors'.format(atk)) # for each gpu assign to each attack attack_tensor_training_dict[atk] = [] attack_tensor_testing_dict[atk] = [] for i in range(aux_dup_count): if atk == 'ifgsm': with tf.device('/gpu:{}'.format(AUX_GPU_IDX[i])): print('ifgsm GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i])) # ifgsm tensors for training ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( ifgsm_obj.generate(x=x_attacks[i], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) elif atk == 'mim': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])): print('mim GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])) # mim tensors for training mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( mim_obj.generate( x=x_attacks[i + 1 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)) elif atk == 'madry': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])): print('madry GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])) # madry tensors for training madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( madry_obj.generate( x=x_attacks[i + 2 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) # combine all attack tensors adv_concat_list = [] for i in range(aux_dup_count): adv_concat_list.append( tf.concat( [attack_tensor_training_dict[atk][i] for atk in attacks], axis=0)) # the tensor that contains each batch of adv samples for training # has same sample order as the labels adv_super_batch_tensor = tf.concat(adv_concat_list, axis=0) #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) print('******************** debug info **********************') # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) print('pretrain var list') for v in pretrain_var_list: print((v.name, v.shape)) print('**********************************') train_var_list = tf.get_collection(CONV_VARIABLES) print('train var list') for v in train_var_list: print((v.name, v.shape)) print('**********************************') # all variables print('all variables') vl = tf.global_variables() for v in vl: print((v.name, v.shape)) print('**********************************') # all ops ops = [n.name for n in tf.get_default_graph().as_graph_def().node] print('total number of ops') print(len(ops)) # for op in ops: # print(op) print('******************** debug info **********************') # exit() # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps print('total number of steps: {}'.format(T)) step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch parameter_dict['step_for_epoch'] = step_for_epoch print('step_for_epoch: {}'.format(step_for_epoch)) # generate some fixed noise perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) # one time perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) # one time parameter_dict['perturbH_test'] = perturbH_test print('perturbH_test') print(perturbH_test.shape) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) # one time perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) # one time parameter_dict['perturbFM_h'] = perturbFM_h print('perturbFM_h') print(perturbFM_h.shape) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) # one time parameter_dict['Noise'] = Noise Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update, L) # one time parameter_dict['Noise_test'] = Noise_test print('Noise and Noise_test') print(Noise.shape) print(Noise_test.shape) # exit() # some timing variables adv_duration_total = 0.0 adv_duration_count = 0 train_duration_total = 0.0 train_duration_count = 0 # some debug flag adv_batch_flag = True batch_flag = True L_flag = True parameter_flag = True _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() # TODO: fix this d_eps = random.random() * 0.5 # d_eps = 0.25 print('d_eps: {}'.format(d_eps)) # version with 3 AUX GPU # get two super batchs, one for benign training, one for adv training super_batch_images, super_batch_labels = cifar10_data.train.next_super_batch( N_GPUS, random=True) super_batch_images_for_adv, super_batch_adv_labels = cifar10_data.train.next_super_batch_premix_ensemble( N_GPUS, random=True) # TODO: re-arrange the adv labels to match the adv samples # run adv_tensors_batch_concat to generate adv samples super_batch_adv_images = sess.run(adv_super_batch_tensor, feed_dict={ x_sb: super_batch_images_for_adv, adv_noise: Noise, mu_alpha: [d_eps] }) adv_finish_time = time.time() adv_duration = adv_finish_time - start_time adv_duration_total += adv_duration adv_duration_count += 1 if adv_batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) print(super_batch_adv_images.shape) print(super_batch_adv_labels.shape) adv_batch_flag = False if batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) batch_flag = False if L_flag: print("L: {}".format(L)) L_flag = False if parameter_flag: print('*=*=*=*=*') print(parameter_dict) print('*=*=*=*=*', flush=True) logfile.write('*=*=*=*=*\n') logfile.write(str(parameter_dict)) logfile.write('*=*=*=*=*\n') parameter_flag = False _, _, avg_loss_value = sess.run( [pretrain_op, train_op, avg_loss], feed_dict={ x_sb: super_batch_images, y_sb: super_batch_labels, adv_x_sb: super_batch_adv_images, adv_y_sb: super_batch_adv_labels, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) assert not np.isnan( avg_loss_value), 'Model diverged with loss = NaN' train_finish_time = time.time() train_duration = train_finish_time - adv_finish_time train_duration_total += train_duration train_duration_count += 1 # save model every 50 epochs if step % (50 * step_for_epoch) == 0 and (step >= 50 * step_for_epoch): print('saving model') checkpoint_path = os.path.join(os.getcwd() + dirCheckpoint, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # Save the model checkpoint periodically. # if step % (10*step_for_epoch) == 0 and (step > _global_step): if step % 10 == 0 and (step > _global_step): # print n steps and time print("current epoch: {:.2f}".format(step / step_for_epoch)) num_examples_per_step = L * N_GPUS * 2 avg_adv_duration = adv_duration_total / adv_duration_count avg_train_duration = train_duration_total / train_duration_count avg_total_duration = avg_adv_duration + avg_train_duration examples_per_sec = num_examples_per_step / avg_total_duration sec_per_step = avg_total_duration # sec_per_batch = sec_per_step / (N_GPUS * 2) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.2f ' 'sec/step; %.2f sec/adv_gen_op; %.2f sec/train_op)') actual_str = format_str % ( datetime.now(), step, avg_loss_value, examples_per_sec, sec_per_step, avg_adv_duration, avg_train_duration) print(actual_str, flush=True) logfile.write(actual_str + '\n')
attack_params = { 'batch_size': eval_batch_size, 'eps': config['epsilon'], 'eps_iter': config['step_size'], 'nb_iter': config['num_steps'], 'clip_min': 0., 'clip_max': 255., 'ord': np.inf } #from cleverhans.attacks import FastGradientMethod #attacker = FastGradientMethod(model, back='tf', sess=sess) from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess) max_eps = 16 epsilons = np.linspace(1, max_eps, max_eps) #epsilons = np.linspace(0, max_eps, max_eps // 4, endpoint=False) eval_par = {'batch_size': eval_batch_size} for e in epsilons: start_time = time.time() attack_params.update({'eps': e}) x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval(sess, x, y, preds_adv, X_test[:nb_samples],
def main(argv=None): """ CIFAR10 CleverHans tutorial :return: """ # CIFAR10-specific dimensions img_rows = 32 img_cols = 32 channels = 3 nb_classes = 10 # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) set_log_level(logging.WARNING) # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, 10)) phase = tf.placeholder(tf.bool, name="phase") model_path = FLAGS.model_path targeted = True if FLAGS.targeted else False binary = True if FLAGS.binary else False scale = True if FLAGS.scale else False learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters batch_size = FLAGS.batch_size nb_samples = FLAGS.nb_samples nb_epochs = FLAGS.nb_epochs delay = FLAGS.delay eps = FLAGS.eps adv = FLAGS.adv attack = FLAGS.attack attack_iterations = FLAGS.attack_iterations save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path( model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if binary: if scale: from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn model = make_scaled_binary_cnn(phase, 'bin_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, 'bin_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, 'fp_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) preds = model(x, reuse=False) print("Defined TensorFlow model graph.") rng = np.random.RandomState([2017, 8, 30]) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an CIFAR10 model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv: from cleverhans.attacks import FastGradientMethod fgsm = FastGradientMethod(model, back='tf', sess=sess) fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.} adv_x_train = fgsm.generate(x, phase, **fgsm_params) preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) evaluate() # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Build dataset ########################################################################### if targeted: from cleverhans.utils import build_targeted_dataset adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels) else: adv_inputs = X_test[:nb_samples] ########################################################################### # Craft adversarial examples using generic approach ########################################################################### if targeted: att_batch_size = np.clip( nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1) nb_adv_per_sample = nb_classes - 1 yname = "y_target" else: att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE) nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) + ' adversarial examples') print("This could take some time ...") if attack == ATTACK_CARLINI_WAGNER_L2: from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess) attack_params = {'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': att_batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.}) X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params) ''' adv_x = attacker.generate(x, phase, **attack_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) eval_params = {'batch_size': att_batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' if targeted: assert X_test_adv.shape[0] == nb_samples * \ (nb_classes - 1), X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating targeted results") adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels, phase=phase, args=eval_params) else: assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating un-targeted results") adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test, phase=phase, args=eval_params) # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Friendly output for pasting into spreadsheet print('{0:.4f},'.format(accuracy)) print('{0:.4f},'.format(adv_accuracy)) print('{0:.4f},'.format(percent_perturbed)) sess.close() ''' print("Repeating the process, using adversarial training") def evaluate_2(): # Evaluate the accuracy of the adversarialy trained CIFAR10 model on # legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained CIFAR10 model on # adversarial examples accuracy_adv = model_eval(sess, x, y, preds_adv, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate_2, args=train_params) ''' '''
def main(_): tf.logging.set_verbosity(tf.logging.DEBUG) # Images for inception classifier are normalized to be in [-1, 1] interval, num_classes = 1001 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] # Load ImageNet Class Labels with open('labels.json') as f: labels = json.load(f) # Prepare Graph with tf.Graph().as_default(): # Build Model if FLAGS.model_arch.lower() == 'resnet_v2_101': model = models.Resnet_V2_101_Model(num_classes) exceptions = [] elif FLAGS.model_arch.lower() == 'inception_v3': model = models.Inception_V3_Model(num_classes) exceptions = ['InceptionV3/AuxLogits.*'] else: raise ValueError('Invalid model architecture specified: {}'.format( FLAGS.model_arch)) # Define Model Variables x_input = tf.placeholder(tf.float32, shape=batch_shape) FastGradientMethod(model).generate(x_input) model_variables = tf.contrib.framework.filter_variables( slim.get_model_variables(), exclude_patterns=exceptions) # Load Session saver = tf.train.Saver(model_variables) with tf.train.SessionManager().prepare_session( master=FLAGS.master, checkpoint_filename_with_path=FLAGS.checkpoint_path, saver=saver) as sess: # For Targeted Attacks target_idx = 0 # This will vary target = tf.constant(0, shape=[FLAGS.batch_size, num_classes]) # target = np.zeros((FLAGS.batch_size, num_classes), dtype=np.uint32) # target[:, target] = 1 # Build Attack if FLAGS.attack_type.lower() == 'fgsm': fgsm_opts = { 'eps': 0.3, 'clip_min': 0, 'clip_max': 1., 'y_target': None } fgsm = FastGradientMethod(model) x_adv = fgsm.generate(x_input, **fgsm_opts) elif FLAGS.attack_type.lower() == 'bim': bim_opts = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'y_target': None } bim = BasicIterativeMethod(model) x_adv = bim.generate(x_input, **bim_opts) elif FLAGS.attack_type.lower() == 'mim': mim_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.} mim = MomentumIterativeMethod(model) x_adv = mim.generate(x_input, **mim_opts) elif FLAGS.attack_type.lower() == 'pgd': pgd_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.} pgd = MadryEtAl(model) x_adv = pgd.generate(x_input, **pgd_opts) # Broken elif FLAGS.attack_type.lower() == 'jsma': jsma_opts = { 'theta': 1., 'gamma': 0.1, 'clip-min': 0., 'clip-max': 1., 'y_target': None } jsma = SaliencyMapMethod(model) x_adv = jsma.generate(x_input, **jsma_opts) elif FLAGS.attack_type.lower() == 'lbfgs': lbfgs_opts = {'y_target': target} lbfgs = LBFGS(model) x_adv = lbfgs.generate(x_input, **lbfgs_opts) else: raise ValueError('Invalid attack type specified: {}'.format( FLAGS.attack_type)) start_time, batch_time, num_processed = time.time(), time.time(), 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir) if FLAGS.show_predictions: preds = sess.run(model(np.float32(images))) probs = np.amax(preds, axis=1) classes = np.argmax(preds, axis=1) adv_preds = sess.run(model(adv_images)) adv_probs = np.amax(adv_preds, axis=1) adv_classes = np.argmax(adv_preds, axis=1) for i, _ in enumerate(filenames): print('\nOriginal: {:.2f}% ({})\nAdversarial: {:.2f}% ({})'.format( \ probs[i]*100, labels[str(classes[i])], adv_probs[i]*100, labels[str(adv_classes[i])])) time_delta = time.time() - batch_time batch_time = time.time() num_processed += len(filenames) print('[SPEED ESTIMATION] BatchRate={:.4f} Hz; AverageRate={:.4f} Hz'.format( \ (len(filenames) / time_delta * 1.0), ((num_processed * 1.0) / (batch_time - start_time))))
def mnist_attack(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=True, nb_epochs=6, batch_size=128, nb_filters=64, nb_samples=10, learning_rate=0.001, eps=0.3, attack=0, attack_iterations=100, model_path=None, targeted=False, binary=False, scale=False, rand=False, debug=None, test=False, data_dir=None, delay=0, adv=0, nb_iter=40): """ MNIST tutorial for generic attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param nb_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # MNIST-specific dimensions img_rows = 28 img_cols = 28 channels = 1 nb_classes = 10 # Set TF random seed to improve reproducibility tf.set_random_seed(1237) # Create TF session sess = tf.Session() print("Created TensorFlow session.") if debug: set_log_level(logging.DEBUG) else: set_log_level(logging.WARNING) # for running on sharcnet # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) phase = tf.placeholder(tf.bool, name='phase') # for attempting to break unscaled network. logits_scalar = tf.placeholder_with_default(INIT_T, shape=(), name="logits_temperature") save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path(model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given # Define TF model graph if binary: print('binary=True') if scale: print('scale=True') if rand: print('rand=True') from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn model = make_scaled_binary_rand_cnn( phase, logits_scalar, 'binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn model = make_scaled_binary_cnn(phase, logits_scalar, 'binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, logits_scalar, 'bin_', nb_filters=nb_filters) else: if rand: print('rand=True') from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn model = make_scaled_rand_cnn(phase, logits_scalar, 'fp_rand', nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, logits_scalar, 'fp_', nb_filters=nb_filters) preds = model(x, reuse=False) # * logits_scalar print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### rng = np.random.RandomState([2017, 8, 30]) # Train an MNIST model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl train_attack_params = { 'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter } train_attacker = MadryEtAl(model, sess=sess) elif adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod stddev = int(np.ceil((MAX_EPS * 255) // 2)) train_attack_params = { 'eps': tf.abs( tf.truncated_normal(shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev)) } train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv_train = model.get_probs(adv_x_train) eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.} adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params) preds_adv_eval = model.get_probs(adv_x_eval) # * logits_scalar def evaluate(): # Evaluate the accuracy of the MNIST model on clean test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) if adv != 0: # Accuracy of the adversarially trained model on adversarial # examples acc = model_eval(sess, x, y, preds_adv_eval, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % acc) acc = model_eval(sess, x, y, preds_adv_eval, X_test, Y_test, phase=phase, args=eval_params, feed={logits_scalar: ATTACK_T}) print('Test accuracy on adversarial examples (scaled): %0.4f' % acc) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs if test: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) else: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) if test: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng) else: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv_train, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) evaluate() # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Build dataset ########################################################################### if viz_enabled: assert nb_samples == nb_classes idxs = [ np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] viz_rows = nb_classes if targeted else 2 # Initialize our array for grid visualization grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels) grid_viz_data = np.zeros(grid_shape, dtype='f') if targeted: from cleverhans.utils import build_targeted_dataset if viz_enabled: from cleverhans.utils import grid_visual adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels) else: adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels) else: if viz_enabled: from cleverhans.utils import pair_visual adv_inputs = X_test[idxs] else: adv_inputs = X_test[:nb_samples] ########################################################################### # Craft adversarial examples using generic approach ########################################################################### if targeted: att_batch_size = np.clip(nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1) nb_adv_per_sample = nb_classes - 1 yname = "y_target" else: att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE) nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) + ' adversarial examples') print("This could take some time ...") if attack == ATTACK_CARLINI_WAGNER_L2: print('Attack: CarliniWagnerL2') from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess) attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': att_batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: print('Attack: SaliencyMapMethod') from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: print('Attack: FastGradientMethod') from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: print('Attack: MadryEtAl') from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} elif attack == ATTACK_BASICITER: print('Attack: BasicIterativeMethod') from cleverhans.attacks import BasicIterativeMethod attacker = BasicIterativeMethod(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.}) adv_np = attacker.generate_np(adv_inputs, phase, **attack_params) ''' name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples) fpath = os.path.join( '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name) np.savez(fpath, x=adv_np, y=Y_test[:nb_samples]) ''' ''' adv_x = attacker.generate(x, phase, **attack_params) adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' eval_params = {'batch_size': att_batch_size} if targeted: print("Evaluating targeted results") adv_accuracy = model_eval(sess, x, y, preds, adv_np, true_labels, phase=phase, args=eval_params) else: print("Evaluating untargeted results") if viz_enabled: adv_accuracy = model_eval(sess, x, y, preds, adv_np, Y_test[idxs], phase=phase, args=eval_params) else: adv_accuracy = model_eval(sess, x, y, preds, adv_np, Y_test[:nb_samples], phase=phase, args=eval_params) if viz_enabled: n = nb_classes - 1 for i in range(nb_classes): if targeted: for j in range(nb_classes): if i != j: if j != 0 and i != n: grid_viz_data[i, j] = adv_np[j * n + i] if j == 0 and i > 0 or i == n and j > 0: grid_viz_data[i, j] = adv_np[j * n + i - 1] else: grid_viz_data[i, j] = adv_inputs[j * n] else: grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv_np[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Compute number of modified features (L_0 norm) nb_changed = np.where(adv_np != adv_inputs)[0].shape[0] percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0]) # Compute the average distortion introduced by the algorithm print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb)) # Friendly output for pasting into spreadsheet print('{0:.4f}'.format(accuracy)) print('{0:.4f}'.format(adv_accuracy)) print('{0:.4f}'.format(percent_perturbed)) print('{0:.4f}'.format(percent_perturb)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt _ = grid_visual(grid_viz_data) return report
def train(fgsm_eps, _dp_epsilon, _attack_norm_bound, log_filename, ratio): FLAGS = None #ratio = 16 #target_eps = [0.125,0.25,0.5,1,2,4,8] #target_eps = [0.25 + 0.25*ratio] target_eps = [0.2 + 0.2 * ratio] #print(target_eps[0]) #fgsm_eps = 0.1 dp_epsilon = _dp_epsilon image_size = 28 _log_filename = log_filename + str(target_eps[0]) + '_fgsm_' + str( fgsm_eps) + '_dpeps_' + str(dp_epsilon) + '_attack_norm_bound_' + str( _attack_norm_bound) + '.txt' clip_bound = 0.001 # 'the clip bound of the gradients' clip_bound_2 = 1 / 1.5 # 'the clip bound for r_kM' small_num = 1e-5 # 'a small number' large_num = 1e5 # a large number' num_images = 50000 # 'number of images N' batch_size = 125 # 'batch_size L' sample_rate = batch_size / 50000 # 'sample rate q = L / N' # 900 epochs num_steps = 1800000 # 'number of steps T = E * N / L = E / q' num_epoch = 24 # 'number of epoches E' sigma = 5 # 'sigma' delta = 1e-5 # 'delta' lambd = 1e3 # 'exponential distribution parameter' iterative_clip_step = 2 # 'iterative_clip_step' clip = 1 # 'whether to clip the gradient' noise = 0 # 'whether to add noise' redistribute = 0 # 'whether to redistribute the noise' D = 50000 sess = tf.InteractiveSession() # Create the model x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32]) W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) W_fc1 = weight_variable([7 * 7 * 64, 25]) b_fc1 = bias_variable([25]) W_fc2 = weight_variable([25, 10]) b_fc2 = bias_variable([10]) def inference(x, dp_mult): x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu((conv2d(x_image, W_conv1) + b_conv1) + dp_mult) h_pool1 = max_pool_2x2(h_conv1) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return y_conv, h_conv1 def inference_prob(x): logits, _ = inference(x, 0) y_prob = tf.nn.softmax(logits) return y_prob shape = W_conv1.get_shape().as_list() w_t = tf.reshape(W_conv1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 attack_norm_bound = _attack_norm_bound dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) #y_conv, h_conv1 = inference(x, dp_mult * noise) y_conv, h_conv1 = inference(x, attack_norm_bound * noise) softmax_y = tf.nn.softmax(y_conv) # Define loss and optimizer priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # sess.run(tf.initialize_all_variables()) sess.run(tf.global_variables_initializer()) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy); #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy) # noise redistribution # grad, = tf.gradients(cross_entropy, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0))) #print(normalized_grad_r) sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) #print(sum_r) normalized_grad_r = 256 * 32 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = GradientDescentOptimizer(learning_rate=1e-1) # compute gradient gw_W1 = tf.gradients(cross_entropy, W_conv1)[0] # gradient of W1 gb1 = tf.gradients(cross_entropy, b_conv1)[0] # gradient of b1 gw_W2 = tf.gradients(cross_entropy, W_conv2)[0] # gradient of W2 gb2 = tf.gradients(cross_entropy, b_conv2)[0] # gradient of b2 gw_Wf1 = tf.gradients(cross_entropy, W_fc1)[0] # gradient of W_fc1 gbf1 = tf.gradients(cross_entropy, b_fc1)[0] # gradient of b_fc1 gw_Wf2 = tf.gradients(cross_entropy, W_fc2)[0] # gradient of W_fc2 gbf2 = tf.gradients(cross_entropy, b_fc2)[0] # gradient of b_fc2 # clip gradient gw_W1 = tf.clip_by_norm(gw_W1, clip_bound) gw_W2 = tf.clip_by_norm(gw_W2, clip_bound) gw_Wf1 = tf.clip_by_norm(gw_Wf1, clip_bound) gw_Wf2 = tf.clip_by_norm(gw_Wf2, clip_bound) # sigma = FLAGS.sigma # when comp_eps(lmbda,q,sigma,T,delta)==epsilon # sensitivity = 2 * FLAGS.clip_bound #adjacency matrix with one tuple different sensitivity = clip_bound # adjacency matrix with one more tuple gw_W1 += tf.random_normal(shape=tf.shape(gw_W1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_W2 += tf.random_normal(shape=tf.shape(gw_W2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_Wf1 += tf.random_normal(shape=tf.shape(gw_Wf1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gbf1 += tf.random_normal(shape=tf.shape(gbf1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_Wf2 += tf.random_normal(shape=tf.shape(gw_Wf2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gbf2 += tf.random_normal(shape=tf.shape(gbf2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) train_step = opt.apply_gradients([(gw_W1, W_conv1), (gb1, b_conv1), (gw_W2, W_conv2), (gb2, b_conv2), (gw_Wf1, W_fc1), (gbf1, b_fc1), (gw_Wf2, W_fc2), (gbf2, b_fc2)]) # craft adversarial samples from x for testing #softmax_y_test = tf.nn.softmax(y_conv) #====================== attack ========================= attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } # define cleverhans abstract models for using cleverhans attacks ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=0.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #Define the correct prediction and accuracy# correct_prediction_x = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32)) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM**2, 28 * 28 * 32).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 28, 28, 32]) start_time = time.time() logfile = open(_log_filename, 'w') last_eval_time = -1 accum_time = 0 accum_epoch = 0 max_benign_acc = -1 max_adv_acc_dict = {} test_size = len(mnist.test.images) print("Computing The Noise Redistribution Vector") for i in range(4000): batch = mnist.train.next_batch(batch_size) sess.run([train_step], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5, noise: __noiseE * 0 }) batch = mnist.train.next_batch(batch_size * 10) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0, noise: __noiseE * 0 }) #print(grad_redis) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0, noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM**2, 28 * 28 * 32).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 28, 28, 32]) * grad_redis sess.run(tf.global_variables_initializer()) print("Training") for i in range(num_steps): batch = mnist.train.next_batch(batch_size) sess.run( [train_step], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5, noise: (__noiseE + __noiseH) / 2 }) sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if i % 1000 == 0: print(i, spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break print("Testing") benign_acc = accuracy_x.eval( feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 }) ### PixelDP Robustness ### adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0 }) #grad_redis = sess.run([normalized_grad_r], feed_dict={x: adv_images_dict, y_: mnist.test.labels, keep_prob: 1.0, noise:__noise}) ### Robustness ### predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = softmax_y.eval( feed_dict={ x: adv_images_dict, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 2000): if n_draws % 1000 == 0: print(n_draws) _noiseE = np.random.normal(0.0, sigmaEGM**2, 28 * 28 * 32).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 28, 28, 32]) _noise = np.random.normal(0.0, sigmaHGM**2, 28 * 28 * 32).astype(np.float32) _noise = np.reshape(_noise, [-1, 28, 28, 32]) * grad_redis for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = softmax_y.eval( feed_dict={ x: adv_images_dict, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_size): is_correct.append( np.argmax(mnist.test.labels[j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=1e-5, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust) * 1.0 / test_size print(" {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])) ############################## log_str = "step: {}\t target_epsilon: {}\t dp_epsilon: {:.1f}\t attack_norm_bound: {:.1f}\t benign_acc: {:.4f}\t".format( i, target_eps, dp_epsilon, attack_norm_bound, benign_acc) for atk in attack_switch.keys(): if attack_switch[atk]: log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') ############################## duration = time.time() - start_time logfile.write(str(duration) + '\n') logfile.flush() logfile.close()
def attack_batch(model, in_im, net_name, attack_name, im_list, gt_labels, sample_size, batch_size): logging.basicConfig(filename='Logs/'+net_name+"_"+attack_name+'.log', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s') config = tf.ConfigProto(device_count = {'GPU': 2}) imgs = open(im_list).readlines() # [::10] gt_labels = open(gt_labels).readlines() # [::10] top_1 = 0;top_1_real = 0;fool_rate = 0 isotropic, size = get_params(net_name) imageModel = CallableModelWrapper(model, 'logits') with tf.Session(config=config) as sess: if attack_name=='fgsm': attack = FastGradientMethod(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,clip_min=-124, clip_max=155) if attack_name=='ifgsm': attack = BasicIterativeMethod(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155) if attack_name=='cw2': attack = CarliniWagnerL2(imageModel, back='tf') adv_x = attack.generate(in_im,clip_min=-124, clip_max=155) if attack_name=='jsma': attack = SaliencyMapMethod(imageModel, back='tf') adv_x = attack.generate(in_im) if attack_name=='pgd': attack = MadryEtAl(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155) if attack_name=='deepfool': attack = DeepFool(imageModel, back='tf') adv_x = attack.generate(in_im, sess=sess, clip_min=-124, clip_max=155) sess.run(tf.global_variables_initializer()) img_loader = loader_func(net_name, sess, isotropic, size) batch_im = np.zeros((batch_size, size, size, 3)) for i in range(sample_size/batch_size): lim = min(batch_size, len(imgs)-i*batch_size) for j in range(lim): im = img_loader(imgs[i*batch_size+j].strip()) batch_im[j] = np.copy(im) gt = np.array([int(gt_labels[i*batch_size+j].strip()) for j in range(lim)]) adv_x_np=adv_x.eval(feed_dict={in_im: batch_im}) # Calculate the neural probabilities y_adv_prob=tf.nn.softmax(model(in_im), name="yadv").eval(feed_dict={in_im: adv_x_np}); y_adv = np.argmax(y_adv_prob,1) y_true_prob=tf.nn.softmax(model(in_im), name="ypred").eval(feed_dict={in_im: batch_im}); y_true = np.argmax(y_true_prob,1) # Calculate the top-1, top-1-true accuracies and fooling rate top_1 += np.sum(y_adv == gt); top_1_real += np.sum(y_true == gt) fool_rate += np.sum(y_true != y_adv ) if i != 0 and i % 2 == 0: logging.info("batch: {} ==================================================================".format(i)) logging.info("fooling rate {}".format((fool_rate)/float((i+1)*batch_size)*100)) logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") logging.info('Real Top-1 Accuracy = {}'.format( top_1_real/float(sample_size)*100)) logging.info('Top-1 Accuracy = {}'.format((top_1/float(sample_size)*100))) logging.info('Top-1 Fooling Rate = {}'.format(fool_rate/float(sample_size)*100)) logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
# # cw_params = {'binary_search_steps': 1, # # 'max_iterations': attack_iterations, # # 'learning_rate': 0.1, # # 'batch_size': n_attack, # # 'initial_const': 10} # cw = CarliniWagnerL2(wrap_clf, back='tf', sess=sess) # adv = cw.generate_np(X_atk, **cw_params) from cleverhans.attacks import MadryEtAl pgd_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 40, 'clip_min': 0., 'clip_max': 1., 'rand_init': True} pgd = MadryEtAl(wrap_clf, sess=sess) adv = pgd.generate_np(X_atk, **pgd_params) # adv_x = cw.generate(x, **cw_params) # preds_adv = clf(adv_x) # acc = model_eval(sess, x, y, preds_adv, X_test[:n_attack], # y_test[:n_attack], args={'batch_size': n_attack}) # print('Test accuracy on CW adversarial examples: %0.4f\n' % acc) pred = clf.predict(adv) # print(np.sum(np.argmax(pred, axis=1) != np.argmax(y_test[:n_attack], axis=1))) # pred_orig = clf.predict(X_atk) # print(np.sum(np.argmax(pred, axis=1) != np.argmax(pred_orig, axis=1))) print(np.sum(np.argmax(pred, axis=1) == np.argmax(y_target, axis=1))) # Save some images
def prep_bbox(sess, logits_scalar, x, y, X_train, Y_train, X_test, Y_test, img_rows, img_cols, channels, nb_epochs, batch_size, learning_rate, rng, phase=None, binary=False, scale=False, nb_filters=64, model_path=None, adv=0, delay=0, eps=0.3): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ # Define TF model graph (for the black-box model) save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path(model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if binary: if scale: #from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn # model = make_scaled_binary_cnn(phase, 'bb_binsc_', input_shape=( from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn model = make_scaled_binary_rand_cnn(phase, logits_scalar, 'bb_binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, logits_scalar, 'bb_bin_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, logits_scalar, 'bb_fp_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) preds = model(x, reuse=False) print("Defined TensorFlow model graph.") def evaluate(): # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: %.4f' % acc) # Train an MNIST model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'bb train loss', 'filename': 'bb_model', 'train_scope': 'bb_model', 'reuse_global_step': False, 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl nb_iter = 20 train_attack_params = { 'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter } train_attacker = MadryEtAl(model, sess=sess) if adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) accuracy = evaluate() return model, preds, accuracy, model_path
def PDP_resnet_with_pretrain_adv(TIN_data, resnet_params, train_params, params_to_save): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] pretrain_vars = [] with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable('kernel1', shape=[train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) pretrain_vars.append(kernel1) pretrain_vars.append(biases1) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*train_params.Delta2/(train_params.effective_batch_size * sensitivity) print('gamma: {}'.format(gamma)) # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable('kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb') # input is the bunch of n_batchs x_sb_adv = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb_adv') x_test = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_test') y_sb = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_sb_adv = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb_adv') y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') FM_h = tf.placeholder(tf.float32, [None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters], name='FM_h') # one time noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='noise') # one time adv_noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='adv_noise') # one time learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate') keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') # list of grads for each GPU tower_pretrain_grads = [] tower_train_grads = [] all_train_loss = [] # optimizers pretrain_opt = tf.train.AdamOptimizer(learning_rate) train_opt = tf.train.AdamOptimizer(learning_rate) # model and loss on one GPU with tf.device('/gpu:{}'.format(GPU_IDX[0])): # setup encoding layer training with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: Enc_Layer2 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(x_sb_adv)[0], Delta=train_params.Delta2, epsilon=train_params.epsilon2, batch_size=None, learning_rate=None, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape=tf.shape(x_sb)[0], Delta=train_params.Delta2, epsilon=train_params.epsilon2, batch_size=None, learning_rate=None, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) pretrain_cost = tf.reduce_mean(pretrain_adv + pretrain_benign) print_var('pretrain_cost', pretrain_cost) # use standard loss first y_logits = inference(x_sb + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax = tf.nn.softmax(y_logits) y_logits_adv = inference(x_sb_adv + adv_noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_adv = tf.nn.softmax(y_logits_adv) # taylor exp # TODO: use noise here perturbW = train_params.perturbFM * final_w # train_loss = TaylorExp_no_noise(y_softmax, y_sb, y_softmax_adv, y_sb_adv, # train_params.effective_batch_size, train_params.alpha) train_loss = TaylorExp(y_softmax, y_sb, y_softmax_adv, y_sb_adv, train_params.effective_batch_size, train_params.alpha, perturbW) print_var('train_loss', train_loss) all_train_loss.append(train_loss) # split testing in each gpu x_sb_tests = tf.split(x_sb, N_ALL_GPUS, axis=0) y_softmax_test_list = [] for gpu in range(N_ALL_GPUS): with tf.device('/gpu:{}'.format(gpu)): # testing graph now in each gpu y_logits_test = test_inference(x_sb_tests[gpu] + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test_list.append(tf.nn.softmax(y_logits_test)) y_softmax_test_concat = tf.concat(y_softmax_test_list, axis=0) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into trainable variable list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('gamma' in var.name or 'beta' in var.name): if var not in pretrain_vars: pretrain_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print_var_list('pretrain_vars', pretrain_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # op for compute grads on one gpu with tf.device('/gpu:{}'.format(GPU_IDX[0])): # get all update_ops (updates of moving averageand std) for batch normalizations update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print_op_list('update ops', update_ops) enc_update_ops = [op for op in update_ops if 'enc_layer' in op.name] print_op_list('enc layer update ops', enc_update_ops) # when the gradients are computed, update the batch_norm with tf.control_dependencies(enc_update_ops): pretrain_grads = pretrain_opt.compute_gradients(pretrain_cost, var_list=pretrain_vars) print('*********** pretrain_grads ***********') for x in pretrain_grads: print(x) print('**********************') with tf.control_dependencies(update_ops): train_grads = train_opt.compute_gradients(train_loss, var_list=train_vars) print('*********** train_grads ***********') for x in train_grads: print(x) print('**********************') avg_pretrain_grads = pretrain_grads avg_train_grads = train_grads # get averaged loss tensor for pretrain and train ops total_loss = tf.reduce_sum(tf.stack(all_train_loss)) total_pretrain_loss = tf.reduce_mean(pretrain_cost) # prepare to save gradients for large batch pretrain_grads_save = [g for g,v in pretrain_grads] # print('*********** pretrain_grads_save ***********' + str(pretrain_grads_save) + '**********************') train_grads_save = [g for g,v in train_grads] # print('*********** train_grads_save ***********' + str(train_grads_save) + '**********************') pretrain_grads_shapes = [g.shape.as_list() for g in pretrain_grads_save] train_grads_shapes = [g.shape.as_list() for g in train_grads_save] # placeholders for importing saved gradients pretrain_grads_placeholders = [] for g,v in pretrain_grads: pretrain_grads_placeholders.append(tf.placeholder(tf.float32, v.shape)) train_grads_placeholders = [] for g,v in train_grads: train_grads_placeholders.append(tf.placeholder(tf.float32, v.shape)) # construct the (grad, var) list assemble_pretrain_grads = [] for i in range(len(pretrain_vars)): assemble_pretrain_grads.append((pretrain_grads_placeholders[i], pretrain_vars[i])) assemble_train_grads = [] for i in range(len(train_grads)): assemble_train_grads.append((train_grads_placeholders[i], train_vars[i])) # apply the saved gradients pretrain_op = pretrain_opt.apply_gradients(assemble_pretrain_grads, global_step=global_step) train_op = train_opt.apply_gradients(assemble_train_grads, global_step=global_step) ###################################### # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth=True sess = tf.Session(config=config) print("session created") # get some initial values sess.run(kernel1.initializer) _gamma = sess.run(gamma) _gamma_x = train_params.Delta2 / train_params.effective_batch_size epsilon2_update = train_params.epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) delta_r = train_params.fgsm_eps * (train_params.image_size ** 2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(train_params.enc_h_size ** 2) #dp_mult = (train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update)) / (delta_r / train_params.dp_epsilon) + \ # (2 * train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update))/(delta_h / train_params.dp_epsilon) dp_mult = (train_params.Delta2*train_params.dp_epsilon) / (train_params.effective_batch_size*epsilon2_update * (delta_h / 2 + delta_r)) # save some valus for testing params_to_save['epsilon2_update'] = epsilon2_update params_to_save['dp_mult'] = dp_mult ####################################### # ADV attacks ####################################### # split input for attacks x_attacks = tf.split(x_sb, 3, axis=0) # split it into each batch # currently only ifgsm, mim, and madry attacks are available attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} # wrap the inference ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_output_probs, output_layer='probs', adv_noise=adv_noise, keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) # to save the reference to the attack tensors attack_tensor_training_dict = {} attack_tensor_testing_dict = {} # placeholder for eps parameter mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): print('ifgsm GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[0])) if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict['ifgsm'] = ifgsm_obj.generate(x=x_attacks[0], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[1])): print('mim GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[1])) if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict['mim'] = mim_obj.generate(x=x_attacks[1], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['mim'] = mim_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # place on specific GPU with tf.device('/gpu:{}'.format(AUX_GPU_IDX[2])): print('madry GPU placement') print('/gpu:{}'.format(AUX_GPU_IDX[2])) if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_training_dict['madry'] = madry_obj.generate(x=x_attacks[2], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0) attack_tensor_testing_dict['madry'] = madry_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # combine the tensors adv_tensors_concat = tf.concat([attack_tensor_training_dict[x] for x in train_params.attacks], axis=0) ####################################### # init op print('initialize_all_variables') init = tf.initialize_all_variables() sess.run(init) # load pretrained variables of RESNET if train_params.load_weights: # first we need to load variable name convert table tgt_var_name_dict = {} with open(train_params.weight_table_path, 'r', encoding='utf-8') as inf: lines = inf.readlines() for line in lines: var_names = line.strip().split(' ') if var_names[1] == 'NONE': continue else: tgt_var_name_dict[var_names[0]] = var_names[1] # load variables dict from checkpoint pretrained_var_dict = load_pretrained_vars() # load pre-trained vars using name convert table for var in tf.global_variables(): if var.name in tgt_var_name_dict: # print('var \"{}\" found'.format(var.name)) try: var.load(pretrained_var_dict[tgt_var_name_dict[var.name]], session=sess) print('{} loaded'.format(var.name)) except: print('var {} not loaded since shape changed'.format(var.name)) else: if 'Adam' not in var.name: print('var \"{}\" NOT FOUND'.format(var.name)) else: print('Training model from scratch') ##################################### # init noise and save for testing perturbH_test = np.random.laplace(0.0, 0, train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) perturbH_test = np.reshape(perturbH_test, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) params_to_save['perturbH_test'] = perturbH_test perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) perturbFM_h = np.reshape(perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) params_to_save['perturbFM_h'] = perturbFM_h Noise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size) params_to_save['Noise'] = Noise Noise_test = generateIdLMNoise(train_params.image_size, 0, epsilon2_update, train_params.effective_batch_size) params_to_save['Noise_test'] = Noise_test # save params for testing with open(os.getcwd() + train_params.params_save_path, 'wb') as outf: pickle.dump(params_to_save, outf) print('params saved') #################################### print('start pretrain') start_time = time.time() lr_schedule_list = sorted(train_params.lr_schedule_pretrain.keys()) attacks_and_benign = train_params.attacks + ['benign'] # build zeros numpy arrays for accumulate grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] total_pretrain_loss_value = 0.0 step = 0 # pretrain loop while True: # if enough steps, break if step > train_params.pretrain_steps: break # add steps here so not forgot else: step += 1 # manual schedule learning rate current_epoch = step // (train_params.epoch_steps) current_lr = train_params.lr_schedule_pretrain[get_lr(current_epoch, lr_schedule_list)] # benign and adv batch super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) # get pretrain grads pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], x_sb_adv: adv_super_batch[0], learning_rate: current_lr, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h}) # accumulate grads for i in range(len(accumu_pretrain_grads)): accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i] # accumulate loss values total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value # use accumulated gradients to update variables if step % train_params.batch_multi == 0 and step > 0: # print('effective batch reached at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) # compute the average grads and build the feed dict pretrain_feed_dict = {} for i in range(len(accumu_pretrain_grads)): pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi pretrain_feed_dict[learning_rate] = current_lr # run train ops by feeding the gradients sess.run(pretrain_op, feed_dict=pretrain_feed_dict) # get loss value avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi # reset the average grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] total_pretrain_loss_value = 0.0 # print loss if step % (1*train_params.epoch_steps) == 0 and step >= (1*train_params.epoch_steps): print('pretrain report at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) print('pretrain_loss: {:.6f}'.format(avg_pretrain_loss_value)) #################################### print('start train') start_time = time.time() lr_schedule_list = sorted(train_params.lr_schedule.keys()) # train whole model # build zeros numpy arrays for accumulate grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes] total_pretrain_loss_value = 0.0 total_train_loss_value = 0.0 step = 0 # train loop while True: # if enough steps, break if step > train_params.train_steps: break # add steps here so not forgot else: step += 1 # compute the grads every step # random eps value for trianing d_eps = random.random()*train_params.random_eps_range # manual schedule learning rate current_epoch = step // (train_params.epoch_steps) current_lr = train_params.lr_schedule[get_lr(current_epoch, lr_schedule_list)] # benign and adv batch super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True) # create adv samples super_batch_adv_images = sess.run(adv_tensors_concat, feed_dict={x_sb:adv_super_batch[0], keep_prob:1.0, adv_noise: Noise, mu_alpha:[d_eps]}) # get pretrain and train grads pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], x_sb_adv: super_batch_adv_images, learning_rate: current_lr, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h}) train_grads_save_np, _train_loss_value = sess.run([train_grads_save, total_loss], feed_dict = {x_sb: super_batch[0], y_sb: super_batch[1], x_sb_adv: super_batch_adv_images, y_sb_adv: adv_super_batch[1], keep_prob: train_params.keep_prob, learning_rate: current_lr, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h}) # accumulate grads for i in range(len(accumu_pretrain_grads)): accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i] for i in range(len(accumu_train_grads)): accumu_train_grads[i] = accumu_train_grads[i] + train_grads_save_np[i] # accumulate loss values total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value total_train_loss_value = total_train_loss_value + _train_loss_value # use accumulated gradients to update variables if step % train_params.batch_multi == 0 and step > 0: # compute the average grads and build the feed dict pretrain_feed_dict = {} for i in range(len(accumu_pretrain_grads)): pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi pretrain_feed_dict[learning_rate] = current_lr # pretrain_feed_dict[keep_prob] = 0.5 train_feed_dict = {} for i in range(len(accumu_train_grads)): train_feed_dict[train_grads_placeholders[i]] = accumu_train_grads[i] / train_params.batch_multi train_feed_dict[learning_rate] = current_lr # train_feed_dict[keep_prob] = 0.5 # run train ops sess.run(pretrain_op, feed_dict=pretrain_feed_dict) sess.run(train_op, feed_dict=train_feed_dict) # get loss value avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi avg_train_loss_value = total_train_loss_value / train_params.batch_multi # reset the average grads accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes] accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes] total_pretrain_loss_value = 0.0 total_train_loss_value = 0.0 # print status every epoch if step % int(train_params.epoch_steps) == 0: dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) # save model if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_save: print('saving model at epoch {}'.format(step / train_params.epoch_steps)) checkpoint_path = os.path.join(os.getcwd() + train_params.check_point_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # testing during training if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_test: test_start = time.time() print('train test reported at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps)) dt = time.time() - start_time avg_epoch_time = dt / (step / train_params.epoch_steps) print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True) print('pretrain_loss: {:.6f}, train_loss: {:.6f}'.format(avg_pretrain_loss_value, avg_train_loss_value)) # print('output layer: \n\t{}'.format(output_layer_value)) #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} log_str = '' # cover all test data for i in range(train_params.test_epochs): test_batch = TIN_data.test.next_batch(train_params.test_batch_size) # if more GPUs available, generate testing adv samples at once if N_AUX_GPUS > 1: adv_images_dict = sess.run(attack_tensor_testing_dict, feed_dict ={x_sb: test_batch[0], adv_noise: Noise_test, mu_alpha: [train_params.fgsm_eps], keep_prob: 1.0}) else: adv_images_dict = {} # test for each attack for atk in attacks_and_benign: if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run(attack_tensor_testing_dict[atk], feed_dict ={x_sb:test_batch[0], adv_noise: Noise_test, mu_alpha:[train_params.fgsm_eps], keep_prob: 1.0}) testing_img = adv_images_dict[atk] else: continue ### PixelDP Robustness ### predictions_form_argmax = np.zeros([train_params.test_batch_size, train_params.num_classes]) softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: perturbFM_h, keep_prob: 1.0}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, train_params.num_samples): _BenignLNoise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size) _perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters) _perturbFM_h = np.reshape(_perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters]) for j in range(train_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h, keep_prob: 1.0}) * \ sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h), keep_prob: 1.0}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(train_params.test_batch_size): is_correct.append(np.argmax(test_batch[1][j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=train_params.fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= train_params.fgsm_eps) adv_acc_dict[atk] += np.sum(is_correct)*1.0/train_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum(is_robust)*1.0/train_params.test_batch_size ############################## # average all acc for whole test data for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / train_params.test_epochs robust_adv_acc_dict[atk] = robust_adv_acc_dict[atk] / train_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[atk] / train_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) dt = time.time() - test_start print('testing time: {}'.format(dt)) print(log_str, flush=True) print('*******************')
def train(cifar10_data, logfile): """Train CIFAR-10 for a number of steps.""" logfile.write("fgsm_eps \t %g, epsilon \t %d \n" % (fgsm_eps, target_eps[0])) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[3, 3, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases1 = cifar10._variable_on_cpu('biases1', [128], tf.constant_initializer(0.0)) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases2 = cifar10._variable_on_cpu('biases2', [128], tf.constant_initializer(0.1)) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases3 = cifar10._variable_on_cpu('biases3', [256], tf.constant_initializer(0.1)) #with tf.variable_scope('local4') as scope: kernel4 = cifar10._variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004) biases4 = cifar10._variable_on_cpu('biases4', [hk], tf.constant_initializer(0.1)) #with tf.variable_scope('local5') as scope: kernel5 = cifar10._variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0) biases5 = cifar10._variable_on_cpu('biases5', [10], tf.constant_initializer(0.1)) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5, scale2, beta2 ] ######## # Build a Graph that computes the logits predictions from the # inference model. shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 14, 14, 128]) sigma = tf.placeholder(tf.float32) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise); y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) #logits = inference(images) # Calculate loss. Apply Taylor Expansion for the output layer loss = cifar10.lossDPSGD(y_conv, y_) # noise redistribution # grad, = tf.gradients(loss, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))**2 sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = tf.train.GradientDescentOptimizer(lr) gw_K1 = tf.gradients(loss, kernel1)[0] gb1 = tf.gradients(loss, biases1)[0] gw_K2 = tf.gradients(loss, kernel2)[0] gb2 = tf.gradients(loss, biases2)[0] gw_K3 = tf.gradients(loss, kernel3)[0] gb3 = tf.gradients(loss, biases3)[0] gw_K4 = tf.gradients(loss, kernel4)[0] gb4 = tf.gradients(loss, biases4)[0] gw_K5 = tf.gradients(loss, kernel5)[0] gb5 = tf.gradients(loss, biases5)[0] #clip gradient gw_K1 = tf.clip_by_norm(gw_K1, clip_bound) gw_K2 = tf.clip_by_norm(gw_K2, clip_bound) gw_K3 = tf.clip_by_norm(gw_K3, clip_bound) gw_K4 = tf.clip_by_norm(gw_K4, clip_bound) gw_K5 = tf.clip_by_norm(gw_K5, clip_bound) #perturb gw_K1 += tf.random_normal(shape=tf.shape(gw_K1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K2 += tf.random_normal(shape=tf.shape(gw_K2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K3 += tf.random_normal(shape=tf.shape(gw_K3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K4 += tf.random_normal(shape=tf.shape(gw_K4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K5 += tf.random_normal(shape=tf.shape(gw_K5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb3 += tf.random_normal(shape=tf.shape(gb3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb4 += tf.random_normal(shape=tf.shape(gb4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb5 += tf.random_normal(shape=tf.shape(gb5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size # apply gradients and keep tracking moving average of the parameters apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1), (gb1, biases1), (gw_K2, kernel2), (gb2, biases2), (gw_K3, kernel3), (gb3, biases3), (gw_K4, kernel4), (gb4, biases4), (gw_K5, kernel5), (gb5, biases5)], global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.control_dependencies( [apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size) # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Privacy accountant priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / batch_size)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / batch_size)) #number of steps for one epoch s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128]) print("Compute The Noise Redistribution Vector") for step in xrange(_global_step, 100 * step_for_epoch): batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0, sigma: sigma_value * 0 }) if step % (5 * step_for_epoch) == 0: print(loss_value) batch = cifar10_data.train.next_batch(40 * batch_size) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * ( sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis sess.run(init) print("Training") for step in xrange(_global_step, _global_step + T): start_time = time.time() batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2}) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: (__noiseE + __noiseH) / 2, sigma: sigma_value }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if step % (5 * step_for_epoch) == 0: print(loss_value) print(spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break ## Robustness print("Testing") adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch(test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={x: test_bach[0]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128]) _noise = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## log_str = "" for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n')
def SSGD_resnet_testing(TIN_data, resnet_params, train_params, test_params, all_params): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable( 'kernel1', shape=[ train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) dp_mult = all_params['dp_mult'] # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable( 'kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_sb') # input is the bunch of n_batchs x_test = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_test') y_sb = tf.placeholder( tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') noise = tf.placeholder(tf.float32, [ None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ], name='noise') # one time keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') with tf.device('/gpu:0'): # the model for testing y_logits_test, _ = test_inference( x_sb, train_params.attack_norm_bound * noise, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test = tf.nn.softmax(y_logits_test) correct_prediction = tf.equal(tf.argmax(y_logits_test, 1), tf.argmax(y_sb, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # print all variables print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name or 'gamma' in var.name or 'beta' in var.name): if var not in train_vars: train_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') ###################################### # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # list all checkpoints in ckpt_path checkpoint_path_read = os.path.join(os.getcwd() + test_params.check_point_dir) ckpts = tf.train.get_checkpoint_state(checkpoint_path_read) print(ckpts) # find the ckpt we need to load and load it for ckpt in ckpts.all_model_checkpoint_paths: # print(ckpt) ckpt_step = int(ckpt.split('-')[-1]) if ckpt_step == test_params.step_to_load: saver.restore(sess, ckpt) print('model loaded from {}'.format(ckpt)) # ####################################### # # setup all attacks attack_switch = { 'fgsm': False, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_output_probs, output_layer='probs', keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) with tf.device('/gpu:0'): if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod with tf.device('/gpu:0'): if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['mim'] = mim_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) with tf.device('/gpu:0'): if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['madry'] = madry_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # ####################################### sigmaEGM = all_params['sigmaEGM'] __noiseE = all_params['__noiseE'] grad_redis = all_params['grad_redis'] _sensitivity_2 = all_params['_sensitivity_2'] _sensitivityW = all_params['_sensitivityW'] Delta_redis = all_params['Delta_redis'] sigmaHGM = all_params['sigmaHGM'] __noiseH = all_params['__noiseH'] __noise_zero = all_params['__noise_zero'] #################################### #################################### print('start testing') start_time = time.time() log_file_path = os.getcwd() + test_params.log_file_path log_file = open(log_file_path, 'a', encoding='utf-8') attacks_and_benign = test_params.attacks + ['benign'] #===================adv samples===================== # for each eps setting for fgsm_eps in test_params.fgsm_eps_list: adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} log_str = '' eps_start_time = time.time() # cover all test data for i in range(test_params.test_epochs): test_batch = TIN_data.test.next_batch( test_params.test_batch_size) adv_images_dict = {} # test for each attack for atk in attacks_and_benign: start_time = time.time() if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run( attack_tensor_testing_dict[atk], feed_dict={ x_sb: test_batch[0], mu_alpha: [fgsm_eps], keep_prob: 1.0 }) testing_img = adv_images_dict[atk] else: continue print('adv gen time: {}s'.format(time.time() - start_time)) start_time = time.time() ### PixelDP Robustness ### predictions_form_argmax = np.zeros([ test_params.test_batch_size, train_params.num_classes ]) softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(1, test_params.num_samples + 1): if n_draws % 100 == 0: print( 'current draws: {}, avg draw time: {}s'.format( n_draws, (time.time() - start_time) / n_draws)) _noiseE = np.random.normal( 0.0, sigmaEGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noiseE = np.reshape(_noiseE, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) _noise = np.random.normal( 0.0, sigmaHGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noise = np.reshape(_noise, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) * grad_redis for j in range(test_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_params.test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] += np.sum( is_correct) * 1.0 / test_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum( is_robust) * 1.0 / test_params.test_batch_size dt = time.time() - start_time print('atk test time: {}s'.format(dt), flush=True) ############################## # average all acc for whole test data log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n") log_str += 'model trained epoch: {}\n'.format( test_params.epoch_to_test) log_str += 'fgsm_eps: {}\n'.format(fgsm_eps) log_str += 'iter_step_testing: {}\n'.format( test_params.iter_step_testing) log_str += 'num_samples: {}\n'.format(test_params.num_samples) for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs robust_adv_acc_dict[ atk] = robust_adv_acc_dict[atk] / test_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[ atk] / test_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) dt = time.time() - eps_start_time print('total test time: {}s'.format(dt), flush=True) print(log_str, flush=True) print('*******************') log_file.write(log_str) log_file.write('*******************\n') log_file.flush() dt = time.time() - start_time log_file.close()
class TestMadryEtAl(CleverHansTest): def setUp(self): super(TestMadryEtAl, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = MadryEtAl(self.model, sess=self.sess) def test_attack_strength(self): """ If clipping is not done at each iteration (not using clip_min and clip_max), this attack fails by np.mean(orig_labels == new_labels) == .5 """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.05, clip_min=0.5, clip_max=0.7, nb_iter=5, sanity_checks=False) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertLess(np.mean(orig_labs == new_labs), 0.1) def test_clip_eta(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1, nb_iter=5) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertTrue(np.all(delta <= 1.)) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1, nb_iter=5, clip_min=-0.2, clip_max=0.3, sanity_checks=False) self.assertLess(-0.201, np.min(x_adv)) self.assertLess(np.max(x_adv), .301) def test_multiple_initial_random_step(self): """ This test generates multiple adversarial examples until an adversarial example is generated with a different label compared to the original label. This is the procedure suggested in Madry et al. (2017). This test will fail if an initial random step is not taken (error>0.5). """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs_multi = orig_labs.copy() # Generate multiple adversarial examples for i in range(10): x_adv = self.attack.generate_np(x_val, eps=.5, eps_iter=0.05, clip_min=0.5, clip_max=0.7, nb_iter=2, sanity_checks=False) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) # Examples for which we have not found adversarial examples I = (orig_labs == new_labs_multi) new_labs_multi[I] = new_labs[I] self.assertLess(np.mean(orig_labs == new_labs_multi), 0.5)
def craft_one_type(sess, model, X, Y, dataset, attack, batch_size, log_path=None, fp_path=None, model_logits=None): """ TODO :param sess: :param model: :param X: :param Y: :param dataset: :param attack: :param batch_size: :return: """ print("entered") if not log_path is None: PATH_DATA = log_path if attack == 'fgsm': # FGSM attack print('Crafting fgsm adversarial samples...') X_adv = fast_gradient_sign_method(sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], clip_min=CLIP_MIN, clip_max=CLIP_MAX, batch_size=batch_size) elif attack == 'adapt-fgsm': # Adaptive FGSM attack print('Crafting fgsm adversarial samples...') X_adv = adaptive_fast_gradient_sign_method( sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], clip_min=CLIP_MIN, clip_max=CLIP_MAX, batch_size=batch_size, log_dir=fp_path, model_logits=model_logits, dataset=dataset) elif attack == 'adapt-bim-b': # BIM attack print('Crafting %s adversarial samples...' % attack) X_adv = adaptive_basic_iterative_method( sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], eps_iter=ATTACK_PARAMS[dataset]['eps_iter'], clip_min=CLIP_MIN, clip_max=CLIP_MAX, batch_size=batch_size, log_dir=fp_path, model_logits=model_logits, dataset=dataset) elif attack in ['bim-a', 'bim-b']: # BIM attack print('Crafting %s adversarial samples...' % attack) its, results = basic_iterative_method( sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], eps_iter=ATTACK_PARAMS[dataset]['eps_iter'], clip_min=CLIP_MIN, clip_max=CLIP_MAX, batch_size=batch_size) if attack == 'bim-a': # BIM-A # For each sample, select the time step where that sample first # became misclassified X_adv = np.asarray([results[its[i], i] for i in range(len(Y))]) else: # BIM-B # For each sample, select the very last time step X_adv = results[-1] elif attack == 'jsma': # JSMA attack print('Crafting jsma adversarial samples. This may take > 5 hours') X_adv = saliency_map_method(sess, model, X, Y, theta=1, gamma=0.1, clip_min=CLIP_MIN, clip_max=CLIP_MAX) elif attack == 'cw-l2': # C&W attack print( 'Crafting %s examples. This takes > 5 hours due to internal grid search' % attack) image_size = ATTACK_PARAMS[dataset]['image_size'] num_channels = ATTACK_PARAMS[dataset]['num_channels'] num_labels = ATTACK_PARAMS[dataset]['num_labels'] cw_attack = CarliniL2(sess, model, image_size, num_channels, num_labels, batch_size=batch_size) X_adv = cw_attack.attack(X, Y) elif attack == 'cw-fp': # C&W attack to break LID detector print( 'Crafting %s examples. This takes > 5 hours due to internal grid search' % attack) image_size = ATTACK_PARAMS[dataset]['image_size'] num_channels = ATTACK_PARAMS[dataset]['num_channels'] num_labels = ATTACK_PARAMS[dataset]['num_labels'] cw_attack = CarliniFP_2vars(sess, model, image_size, num_channels, num_labels, batch_size=batch_size, fp_dir=fp_path) X_adv = cw_attack.attack(X, Y) elif attack == 'spsa': binary_steps = 1 batch_shape = X.shape X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:]) Y_label = tf.placeholder(tf.int32, shape=(1, )) alpha = tf.placeholder(tf.float32, shape=(1, )) num_samples = np.shape(X)[0] # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X)) _min = np.min(X) _max = np.max(X) print(_max, _min) print(tf.trainable_variables()) filters = sess.run('conv1/kernel:0') biases = 0.0 * sess.run('conv1/bias:0') shift_model = Sequential() if (dataset == 'mnist'): shift_model.add( Conv2D(32, kernel_size=(3, 3), activation=None, input_shape=(1, 28, 28))) else: shift_model.add( Conv2D(32, kernel_size=(3, 3), activation=None, input_shape=(3, 32, 32))) X_input_2 = tf.placeholder(tf.float32, shape=(None, ) + batch_shape[1:]) correction_term = shift_model(X_input_2) if (dataset == 'mnist'): X_correction = -0.5 * np.ones( (1, 1, 28, 28) ) # We will shift the image up by 0.5, so this is the correction else: X_correction = -0.5 * np.ones( (1, 3, 32, 32) ) # We will shift the image up by 0.5, so this is the correction # for PGD shift_model.layers[0].set_weights([filters, biases]) bias_correction_terms = (sess.run(correction_term, feed_dict={X_input_2: X_correction})) for i in range(32): biases[i] = bias_correction_terms[0, i, 0, 0] _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) original_biases = model.layers[0].get_weights()[1] original_weights = model.layers[0].get_weights()[0] model.layers[0].set_weights( [original_weights, original_biases + biases]) #Correct model for input shift X = X + 0.5 #shift input to make it >=0 _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) # check accuracy post correction of input and model print('Crafting %s examples. Using Cleverhans' % attack) image_size = ATTACK_PARAMS[dataset]['image_size'] num_channels = ATTACK_PARAMS[dataset]['num_channels'] num_labels = ATTACK_PARAMS[dataset]['num_labels'] from cleverhans.utils_keras import KerasModelWrapper wrapped_model = KerasModelWrapper(model) if dataset == "mnist": wrapped_model.nb_classes = 10 elif dataset == "cifar": wrapped_model.nb_classes = 10 else: wrapped_model.nb_classes = 10 real_batch_size = X.shape[0] X_adv = None spsa = SPSA(wrapped_model, back='tf', sess=sess) spsa_params = { "epsilon": ATTACK_PARAMS[dataset]['eps'], 'num_steps': 100, 'spsa_iters': 1, 'early_stop_loss_threshold': None, 'is_targeted': False, 'is_debug': False } X_adv_spsa = spsa.generate(X_input, alpha=alpha, y=Y_label, fp_path=fp_path, **spsa_params) for i in range(num_samples): # rescale to format TF wants #X_i_norm = (X[i] - _min)/(_max-_min) X_i_norm = X[i] # Run attack best_res = None ALPHA = np.ones(1) * 0.1 lb = 1.0e-2 ub = 1.0e2 for j in range(binary_steps): res = sess.run(X_adv_spsa, feed_dict={ X_input: np.expand_dims(X_i_norm, axis=0), Y_label: np.array([np.argmax(Y[i])]), alpha: ALPHA }) if (dataset == 'mnist'): X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28]) else: X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32]) pred = model(X_place) model_op = sess.run(pred, feed_dict={X_place: res}) if (not np.argmax(model_op) == np.argmax(Y[i, :])): lb = ALPHA[0] else: ub = ALPHA[0] ALPHA[0] = 0.5 * (lb + ub) print(ALPHA) if (best_res is None): best_res = res else: if (not np.argmax(model_op) == np.argmax(Y[i, :])): best_res = res pass # Rescale result back to our scale if (i == 0): X_adv = best_res else: X_adv = np.concatenate((X_adv, best_res), axis=0) _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the adversarial test set: %0.2f%%" % (100.0 * acc)) _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) #Revert model to original model.layers[0].set_weights([original_weights, original_biases]) #Revert adv shift X_adv = X_adv - 0.5 X = X - 0.5 #Not used but just for logging purposes elif attack == 'adapt-pgd': binary_steps = 1 rand_starts = 2 batch_shape = X.shape X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:]) Y_label = tf.placeholder(tf.int32, shape=(1, )) alpha = tf.placeholder(tf.float32, shape=(1, )) num_samples = np.shape(X)[0] # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X)) _min = np.min(X) _max = np.max(X) print(_max, _min) print(tf.trainable_variables()) filters = sess.run('conv1/kernel:0') biases = 0.0 * sess.run('conv1/bias:0') shift_model = Sequential() if (dataset == 'mnist'): shift_model.add( Conv2D(32, kernel_size=(3, 3), activation=None, input_shape=(1, 28, 28))) else: shift_model.add( Conv2D(32, kernel_size=(3, 3), activation=None, input_shape=(3, 32, 32))) X_input_2 = tf.placeholder(tf.float32, shape=(None, ) + batch_shape[1:]) correction_term = shift_model(X_input_2) if (dataset == 'mnist'): X_correction = -0.5 * np.ones( (1, 1, 28, 28) ) # We will shift the image up by 0.5, so this is the correction else: X_correction = -0.5 * np.ones( (1, 3, 32, 32) ) # We will shift the image up by 0.5, so this is the correction # for PGD shift_model.layers[0].set_weights([filters, biases]) bias_correction_terms = (sess.run(correction_term, feed_dict={X_input_2: X_correction})) for i in range(32): biases[i] = bias_correction_terms[0, i, 0, 0] _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) original_biases = model.layers[0].get_weights()[1] original_weights = model.layers[0].get_weights()[0] model.layers[0].set_weights( [original_weights, original_biases + biases]) #Correct model for input shift X = X + 0.5 #shift input to make it >=0 _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) # check accuracy post correction of input and model print('Crafting %s examples. Using Cleverhans' % attack) image_size = ATTACK_PARAMS[dataset]['image_size'] num_channels = ATTACK_PARAMS[dataset]['num_channels'] num_labels = ATTACK_PARAMS[dataset]['num_labels'] from cleverhans.utils_keras import KerasModelWrapper wrapped_model = KerasModelWrapper(model) if dataset == "mnist": wrapped_model.nb_classes = 10 elif dataset == "cifar": wrapped_model.nb_classes = 10 else: wrapped_model.nb_classes = 10 real_batch_size = X.shape[0] X_adv = None pgd = MadryEtAl(wrapped_model, back='tf', sess=sess) X_adv_pgd, adv_loss_fp = pgd.generate(X_input, eps=0.3, eps_iter=0.02, clip_min=0.0, clip_max=1.0, nb_iter=20, rand_init=True, fp_path=fp_path, alpha=alpha) for i in range(num_samples): # rescale to format TF wants #X_i_norm = (X[i] - _min)/(_max-_min) X_i_norm = X[i] # Run attack best_res = None best_res_loss = 1000000.0 ALPHA = np.ones(1) * 0.1 lb = 1.0e-2 ub = 1.0e2 for j in range(binary_steps): bin_flag = 0 for jj in range(rand_starts): [res, res_loss] = sess.run( [X_adv_pgd, adv_loss_fp], feed_dict={ X_input: np.expand_dims(X[i], axis=0), Y_label: np.array([np.argmax(Y[i])]), alpha: ALPHA }) if (dataset == 'mnist'): X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28]) else: X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32]) pred = model(X_place) model_op = sess.run(pred, feed_dict={X_place: res}) if (best_res is None): best_res = res else: if ((not np.argmax(model_op) == np.argmax(Y[i, :])) and res_loss < best_res_loss): best_res = res best_res_loss = res_loss bin_flag = 1 pass if (bin_flag == 1): lb = ALPHA[0] else: ub = ALPHA[0] ALPHA[0] = 0.5 * (lb + ub) print(ALPHA) # Rescale result back to our scale if (i == 0): X_adv = best_res else: X_adv = np.concatenate((X_adv, best_res), axis=0) _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the adversarial test set: %0.2f%%" % (100.0 * acc)) _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) #Revert model to original model.layers[0].set_weights([original_weights, original_biases]) #Revert adv shift X_adv = X_adv - 0.5 X = X - 0.5 #Not used but just for logging purposes pass if ("adapt" in attack or "fp" in attack or "spsa" in attack): [m, _, _, _] = (np.shape(X_adv)) cropped_X_adv = [] cropped_Y = [] cropped_X = [] if (dataset == 'mnist'): X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28]) pred = model(X_place) else: X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32]) pred = model(X_place) for i in range(m): logits_op = sess.run(pred, feed_dict={X_place: X_adv[i:i + 1, :, :, :]}) if (not np.argmax(logits_op) == np.argmax(Y[i, :])): cropped_Y.append(Y[i, :]) cropped_X_adv.append(X_adv[i, :, :, :]) cropped_X.append(X[i, :, :, :]) X_adv = np.array(cropped_X_adv) X = np.array(cropped_X) Y = np.array(cropped_Y) f = open( os.path.join(log_path, 'Random_Test_%s_%s.p' % (dataset, attack)), 'w') pickle.dump({"adv_input": X, "adv_labels": Y}, f) f.close() #np.save(os.path.join(PATH_DATA, 'Adv_%s_%s.npy' % (dataset, attack)), X_adv) f = open(os.path.join(log_path, 'Adv_%s_%s.p' % (dataset, attack)), 'w') pickle.dump({"adv_input": X_adv, "adv_labels": Y}, f) f.close() _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0) print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc)) l2_diff = np.linalg.norm(X_adv.reshape((len(X), -1)) - X.reshape( (len(X), -1)), axis=1).mean() print("Average L-2 perturbation size of the %s attack: %0.2f" % (attack, l2_diff)) if (("adapt" in attack) or ("cw-fp" in attack)): return (X, X_adv, Y) else: print(Y.shape) return (X_adv, Y)
def main(argv): model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if model_file is None: print('No model found') sys.exit() cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir) nb_classes = 10 X_test = cifar.train_data.xs Y_test = to_categorical(cifar.train_data.ys, nb_classes) assert Y_test.shape[1] == 10. print("train data shape", X_test.shape) set_log_level(logging.DEBUG) with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) from cleverhans.model_zoo.madry_lab_challenges.cifar10_model import make_wresnet model = make_wresnet() saver = tf.train.Saver() # Restore the checkpoint saver.restore(sess, model_file) SCOPE = "cifar10_challenge" model2 = make_wresnet(scope=SCOPE) assert len(model.get_vars()) == len(model2.get_vars()) found = [False] * len(model2.get_vars()) for var1 in model.get_vars(): var1_found = False var2_name = SCOPE + "/" + var1.name for idx, var2 in enumerate(model2.get_vars()): if var2.name == var2_name: var1_found = True found[idx] = True sess.run(tf.assign(var2, var1)) break assert var1_found, var1.name assert all(found) model = model2 saver = tf.train.Saver() # Restore the checkpoint #saver.restore(sess, model_file) nb_samples = FLAGS.nb_samples attack_params = { 'batch_size': FLAGS.batch_size, 'clip_min': 0., 'clip_max': 255. } if FLAGS.attack_type == 'cwl2': from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) attack_params.update({ 'binary_search_steps': 1, 'confidence': 0, 'max_iterations': 100, 'learning_rate': 0.1, 'initial_const': 10, 'batch_size': 10 }) else: # eps and eps_iter in range 0-255 attack_params.update({'eps': 16, 'ord': np.inf}) if FLAGS.attack_type == 'fgsm': from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'pgd': attack_params.update({ 'eps': 8, 'eps_iter': .02, 'ord': np.inf, 'nb_iter': 10 }) from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) elif FLAGS.attack_type == 'deepFool': print("here") attack_params.update({ 'ord': np.inf, 'eps': 6.0, 'max_iter': 100 }) from CdeepFool_cleverhans import DeepFool attacker = DeepFool(model, sess=sess) eval_par = {'batch_size': FLAGS.batch_size} if FLAGS.sweep: max_eps = 16 epsilons = np.linspace(1, max_eps, max_eps) for e in epsilons: t1 = time.time() attack_params.update({'eps': e}) x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) x1 = sess.run(x_adv, feed_dict={x: X_test[0], y: Y_test[0]}) print(x1.shape) l_inf = np.amax(np.abs(X_test[0] - x1)) print('perturbation found: {}'.format(l_inf)) acc = model_eval(sess, x, y, preds_adv, X_test[:nb_samples], Y_test[:nb_samples], args=eval_par) print('Epsilon %.2f, accuracy on adversarial' % e, 'examples %0.4f\n' % acc) t2 = time.time() else: t1 = time.time() x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) logits = model.get_logits(x) #print (len(x_adv)) num_eval_examples = 1000 eval_batch_size = 100 num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) x_adv_all = [] # adv accumulator y_adv_all = [] y_true = [] print('Iterating over {} batches'.format(num_batches)) for ibatch in range(num_batches): bstart = ibatch * eval_batch_size bend = min(bstart + eval_batch_size, num_eval_examples) print('batch size: {}'.format(bend - bstart)) x_batch = X_test[bstart:bend, :] y_batch = Y_test[bstart:bend] y_clean = np.argmax(sess.run(logits, feed_dict={x: x_batch}), axis=1) x_b_adv, pred = sess.run([x_adv, preds_adv], feed_dict={ x: x_batch, y: y_batch }) y_b_adv = np.argmax(sess.run(logits, feed_dict={x: x_b_adv}), axis=1) count = 0 y_batch = np.argmax(y_batch, axis=1) for i in range(eval_batch_size): if (y_b_adv[i] != y_batch[i] and y_clean[i] == y_batch[i]): l_inf = np.amax(np.abs(x_batch[i] - x_b_adv[i])) print('perturbation found: {}'.format(l_inf)) #print (y_b_adv[i]) x_adv_all.append(x_b_adv[i]) y_adv_all.append(y_b_adv[i]) y_true.append(y_batch[i]) count += 1 #print (y_adv_all[0:20]) #print (y_true[0:20]) print("Totat adversariak cound in this batch", count) #x_adv_all.extend(x_b_adv) #y_adv_all.extend(y_b_adv) x_adv_all = np.array(x_adv_all) y_true = np.array(y_true) y_adv_all = np.array(y_adv_all) print('Adv Label', y_adv_all[0:20]) print('Ori Label', y_true[0:20]) #y_adv = np.squeeze(y_adv) print(x_adv_all.shape) print(y_adv_all.shape) print(y_true.shape) count = 0 for i in range(y_adv_all.shape[0]): if y_true[i] != y_adv_all[i]: count += 1 print("Total adversarial examples found", count) pickle.dump( (x_adv_all, y_true, y_adv_all), open( '/scratch/kiani/Projects/CIFAR data/Adversarial/deepFool/iter_100/deepFool_E6_train.p', 'wb')) #from numpy import linalg as LA #l_2 = LA.norm(X_test[0] - x1[0]) #l_inf = np.amax(np.abs(x - x_adv)) t2 = time.time() print("Range of data should be 0-255 and actual is: ", str(np.min(x_adv_all)) + " " + str(np.max(x_adv_all))) image = ((x_adv_all[2])).astype(np.uint8) img = Image.fromarray(image) img.save("deepFool_attack.jpeg") print("Took", t2 - t1, "seconds")
def main(argv=None): model_path = FLAGS.model_path targeted = True if FLAGS.targeted else False scale = True if FLAGS.scale else False learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters batch_size = FLAGS.batch_size nb_epochs = FLAGS.nb_epochs delay = FLAGS.delay eps = FLAGS.eps adv = FLAGS.adv attack = FLAGS.attack attack_iterations = FLAGS.attack_iterations nb_iter = FLAGS.nb_iter #### EMPIR extra flags lowprecision=FLAGS.lowprecision abits=FLAGS.abits wbits=FLAGS.wbits abitsList=FLAGS.abitsList wbitsList=FLAGS.wbitsList stocRound=True if FLAGS.stocRound else False rand=FLAGS.rand model_path2 = FLAGS.model_path2 model_path1 = FLAGS.model_path1 model_path3 = FLAGS.model_path3 ensembleThree=True if FLAGS.ensembleThree else False abits2=FLAGS.abits2 wbits2=FLAGS.wbits2 abits2List=FLAGS.abits2List wbits2List=FLAGS.wbits2List #### save = False train_from_scratch = False #### Imagenet flags imagenet_path = FLAGS.imagenet_path if imagenet_path is None: print("Error: Imagenet data path not specified") sys.exit(1) # Imagenet specific dimensions img_rows = _DEFAULT_IMAGE_SIZE img_cols = _DEFAULT_IMAGE_SIZE channels = _NUM_CHANNELS nb_classes = _NUM_CLASSES # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) set_log_level(logging.WARNING) # Get imagenet datasets train_dataset, test_dataset = data_imagenet(nb_epochs, batch_size, imagenet_path) # Creating a initializable iterators train_iterator = train_dataset.make_initializable_iterator() test_iterator = test_dataset.make_initializable_iterator() # Getting next elements from the iterators next_test_element = test_iterator.get_next() next_train_element = train_iterator.get_next() train_x, train_y = train_iterator.get_next() test_x, test_y = test_iterator.get_next() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) phase = tf.placeholder(tf.bool, name="phase") logits_scalar = tf.placeholder_with_default( INIT_T, shape=(), name="logits_temperature") if ensembleThree: if (model_path1 is None or model_path2 is None or model_path3 is None): train_from_scratch = True else: train_from_scratch = False elif model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): train_from_scratch = False else: model_path = build_model_save_path( model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if ensembleThree: if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified for first model if (wbits==0) or (abits==0): print("Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags") sys.exit(1) else: fixedPrec1 = 1 elif (len(wbitsList) != 6) or (len(abitsList) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the first model") sys.exit(1) else: fixedPrec1 = 0 if (wbits2List is None) or (abits2List is None): # Layer wise separate quantization not specified for second model if (wbits2==0) or (abits2==0): print("Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags") sys.exit(1) else: fixedPrec2 = 1 elif (len(wbits2List) != 6) or (len(abits2List) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the second model") sys.exit(1) else: fixedPrec2 = 0 if (fixedPrec2 != 1) or (fixedPrec1 != 1): # Atleast one of the models have separate precisions per layer fixedPrec=0 print("Within atleast one model has separate precisions") if (fixedPrec1 == 1): # first layer has fixed precision abitsList = (abits, abits, abits, abits, abits, abits) wbitsList = (wbits, wbits, wbits, wbits, wbits, wbits) if (fixedPrec2 == 1): # second layer has fixed precision abits2List = (abits2, abits2, abits2, abits2, abits2, abits2) wbits2List = (wbits2, wbits2, wbits2, wbits2, wbits2, wbits2) else: fixedPrec=1 if (train_from_scratch): print ("The ensemble model cannot be trained from scratch") sys.exit(1) if fixedPrec == 1: from cleverhans_tutorials.tutorial_models import make_ensemble_three_alexnet model = make_ensemble_three_alexnet( phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: from cleverhans_tutorials.tutorial_models import make_layerwise_three_combined_alexnet model = make_layerwise_three_combined_alexnet( phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) elif lowprecision: if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified if (wbits==0) or (abits==0): print("Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags") sys.exit(1) else: fixedPrec = 1 elif (len(wbitsList) != 6) or (len(abitsList) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer") sys.exit(1) else: fixedPrec = 0 if fixedPrec: ### For training from scratch from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_alexnet model = make_basic_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbits, abits, input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_alexnet model = make_layerwise_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbitsList, abitsList, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: ### For training from scratch from cleverhans_tutorials.tutorial_models import make_basic_alexnet_from_scratch model = make_basic_alexnet_from_scratch(phase, logits_scalar, 'fp_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) # separate calling function for ensemble models if ensembleThree: preds = model.ensemble_call(x, reuse=False) else: ##default preds = model(x, reuse=False) print("Defined TensorFlow model graph.") rng = np.random.RandomState([2017, 8, 30]) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': batch_size} if ensembleThree: acc = model_eval_ensemble_imagenet( sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params) else: #default below acc = model_eval_imagenet( sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an Imagenet model train_params = { 'lowprecision': lowprecision, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl train_attack_params = {'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter} train_attacker = MadryEtAl(model, sess=sess) elif adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod stddev = int(np.ceil((MAX_EPS * 255) // 2)) train_attack_params = {'eps': tf.abs(tf.truncated_normal( shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev))} train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv_train = model.get_probs(adv_x_train) eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.} adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params) preds_adv_eval = model.get_probs(adv_x_eval) # * logits_scalar # if adv: # from cleverhans.attacks import FastGradientMethod # fgsm = FastGradientMethod(model, back='tf', sess=sess) # fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.} # adv_x_train = fgsm.generate(x, phase, **fgsm_params) # preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs with learning rate reducing with time model_train_imagenet2(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train_imagenet(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase, predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng) else: if ensembleThree: ## ensembleThree models have to loaded from different paths variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # First 11 variables from path1 stored_variables = ['lp_conv1_init/k', 'lp_conv1_init/b', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_conv4_init/k', 'lp_conv5_init/k', 'lp_ip1init/W', 'lp_ip1init/b', 'lp_ip2init/W', 'lp_logits_init/W', 'lp_logits_init/b'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[:11]))) # only dict was messing with the order # Restore the first set of variables from model_path1 saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path1)) # Restore the second set of variables from model_path2 # Second 11 variables from path2 variable_dict = dict(OrderedDict(zip(stored_variables, variables[11:22]))) saver2 = tf.train.Saver(variable_dict) saver2.restore(sess, tf.train.latest_checkpoint(model_path2)) # Third 11 variables from path3 stored_variables = ['fp_conv1_init/k', 'fp_conv1_init/b', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_conv4_init/k', 'fp_conv5_init/k', 'fp_ip1init/W', 'fp_ip1init/b', 'fp_ip2init/W', 'fp_logits_init/W', 'fp_logits_init/b'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[22:33]))) saver3 = tf.train.Saver(variable_dict) saver3.restore(sess, tf.train.latest_checkpoint(model_path3)) # Next 24 batch norm variables from path1 stored_variables = ['lp__batchNorm1/batch_normalization/gamma', 'lp__batchNorm1/batch_normalization/beta', 'lp__batchNorm1/batch_normalization/moving_mean', 'lp__batchNorm1/batch_normalization/moving_variance', 'lp__batchNorm2/batch_normalization/gamma', 'lp__batchNorm2/batch_normalization/beta', 'lp__batchNorm2/batch_normalization/moving_mean', 'lp__batchNorm2/batch_normalization/moving_variance', 'lp__batchNorm3/batch_normalization/gamma', 'lp__batchNorm3/batch_normalization/beta', 'lp__batchNorm3/batch_normalization/moving_mean', 'lp__batchNorm3/batch_normalization/moving_variance', 'lp__batchNorm4/batch_normalization/gamma', 'lp__batchNorm4/batch_normalization/beta', 'lp__batchNorm4/batch_normalization/moving_mean', 'lp__batchNorm4/batch_normalization/moving_variance', 'lp__batchNorm5/batch_normalization/gamma', 'lp__batchNorm5/batch_normalization/beta', 'lp__batchNorm5/batch_normalization/moving_mean', 'lp__batchNorm5/batch_normalization/moving_variance', 'lp__batchNorm6/batch_normalization/gamma', 'lp__batchNorm6/batch_normalization/beta', 'lp__batchNorm6/batch_normalization/moving_mean', 'lp__batchNorm6/batch_normalization/moving_variance'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[33:57]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path1)) # Next 24 batch norm variables from path2 variable_dict = dict(OrderedDict(zip(stored_variables, variables[57:81]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path2)) # Final 24 batch norm variables from path1 stored_variables = ['fp__batchNorm1/batch_normalization/gamma', 'fp__batchNorm1/batch_normalization/beta', 'fp__batchNorm1/batch_normalization/moving_mean', 'fp__batchNorm1/batch_normalization/moving_variance', 'fp__batchNorm2/batch_normalization/gamma', 'fp__batchNorm2/batch_normalization/beta', 'fp__batchNorm2/batch_normalization/moving_mean', 'fp__batchNorm2/batch_normalization/moving_variance', 'fp__batchNorm3/batch_normalization/gamma', 'fp__batchNorm3/batch_normalization/beta', 'fp__batchNorm3/batch_normalization/moving_mean', 'fp__batchNorm3/batch_normalization/moving_variance', 'fp__batchNorm4/batch_normalization/gamma', 'fp__batchNorm4/batch_normalization/beta', 'fp__batchNorm4/batch_normalization/moving_mean', 'fp__batchNorm4/batch_normalization/moving_variance', 'fp__batchNorm5/batch_normalization/gamma', 'fp__batchNorm5/batch_normalization/beta', 'fp__batchNorm5/batch_normalization/moving_mean', 'fp__batchNorm5/batch_normalization/moving_variance', 'fp__batchNorm6/batch_normalization/gamma', 'fp__batchNorm6/batch_normalization/beta', 'fp__batchNorm6/batch_normalization/moving_mean', 'fp__batchNorm6/batch_normalization/moving_variance'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[81:105]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path3)) else: # restoring the model trained using this setup, not a downloaded one tf_model_load(sess, model_path) print('Restored model from %s' % model_path) # evaluate() # Evaluate the accuracy of the model on legitimate test examples eval_params = {'batch_size': batch_size} if ensembleThree: accuracy = model_eval_ensemble_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params) else: #default below accuracy = model_eval_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Build dataset ########################################################################### adv_inputs = test_x #adversarial inputs can be generated from any of the test examples ########################################################################### # Craft adversarial examples using generic approach ########################################################################### nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting adversarial examples') print("This could take some time ...") if ensembleThree: model_type = 'ensembleThree' else: model_type = 'default' if attack == ATTACK_CARLINI_WAGNER_L2: from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} elif attack == ATTACK_BASICITER: print('Attack: BasicIterativeMethod') from cleverhans.attacks import BasicIterativeMethod attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({'clip_min': -2.2, 'clip_max': 2.7}) # Since max and min for imagenet turns out to be around -2.11 and 2.12 eval_params = {'batch_size': batch_size} ''' adv_x = attacker.generate(x, phase, **attack_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) eval_params = {'batch_size': batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' print("Evaluating un-targeted results") if ensembleThree: adv_accuracy = model_eval_ensemble_adv_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params) else: adv_accuracy = model_eval_adv_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params) # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) # Close TF session sess.close()
def main(argv): model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if model_file is None: print('No model found') sys.exit() cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir) nb_classes = 10 X_test = cifar.eval_data.xs Y_test = to_categorical(cifar.eval_data.ys, nb_classes) assert Y_test.shape[1] == 10. set_log_level(logging.DEBUG) with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) from madry_cifar10_model import make_madry_wresnet model = make_madry_wresnet() saver = tf.train.Saver() # Restore the checkpoint saver.restore(sess, model_file) nb_samples = FLAGS.nb_samples attack_params = {'batch_size': FLAGS.batch_size, 'clip_min': 0., 'clip_max': 255.} if FLAGS.attack_type == 'cwl2': from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) attack_params.update({'binary_search_steps': 1, 'max_iterations': 100, 'learning_rate': 0.1, 'initial_const': 10, 'batch_size': 10 }) else: # eps and eps_iter in range 0-255 attack_params.update({'eps': 8, 'ord': np.inf}) if FLAGS.attack_type == 'fgsm': from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'pgd': attack_params.update({'eps_iter': 2, 'nb_iter': 20}) from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) eval_par = {'batch_size': FLAGS.batch_size} if FLAGS.sweep: max_eps = 16 epsilons = np.linspace(1, max_eps, max_eps) for e in epsilons: t1 = time.time() attack_params.update({'eps': e}) x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval(sess, x, y, preds_adv, X_test[ :nb_samples], Y_test[:nb_samples], args=eval_par) print('Epsilon %.2f, accuracy on adversarial' % e, 'examples %0.4f\n' % acc) t2 = time.time() else: t1 = time.time() x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval(sess, x, y, preds_adv, X_test[ :nb_samples], Y_test[:nb_samples], args=eval_par) t2 = time.time() print('Test accuracy on adversarial examples %0.4f\n' % acc) print("Took", t2 - t1, "seconds")
def main(argv): model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if model_file is None: print("No model found") sys.exit() cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir) nb_classes = 10 X_test = cifar.eval_data.xs Y_test = to_categorical(cifar.eval_data.ys, nb_classes) assert Y_test.shape[1] == 10.0 set_log_level(logging.DEBUG) with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) from cleverhans.model_zoo.madry_lab_challenges.cifar10_model import make_wresnet model = make_wresnet() saver = tf.train.Saver() # Restore the checkpoint saver.restore(sess, model_file) nb_samples = FLAGS.nb_samples attack_params = { "batch_size": FLAGS.batch_size, "clip_min": 0.0, "clip_max": 255.0, } if FLAGS.attack_type == "cwl2": from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) attack_params.update( { "binary_search_steps": 1, "max_iterations": 100, "learning_rate": 0.1, "initial_const": 10, "batch_size": 10, } ) else: # eps and eps_iter in range 0-255 attack_params.update({"eps": 8, "ord": np.inf}) if FLAGS.attack_type == "fgsm": from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == "pgd": attack_params.update({"eps_iter": 2, "nb_iter": 20}) from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) eval_par = {"batch_size": FLAGS.batch_size} if FLAGS.sweep: max_eps = 16 epsilons = np.linspace(1, max_eps, max_eps) for e in epsilons: t1 = time.time() attack_params.update({"eps": e}) x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval( sess, x, y, preds_adv, X_test[:nb_samples], Y_test[:nb_samples], args=eval_par, ) print( "Epsilon %.2f, accuracy on adversarial" % e, "examples %0.4f\n" % acc, ) t2 = time.time() else: t1 = time.time() x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval( sess, x, y, preds_adv, X_test[:nb_samples], Y_test[:nb_samples], args=eval_par, ) t2 = time.time() print("Test accuracy on adversarial examples %0.4f\n" % acc) print("Took", t2 - t1, "seconds")
def train_zero_knowledge_gandef_model(train_start=0, train_end=60000, test_start=0, test_end=10000, smoke_test=True, save=False, testing=False, backprop_through_attack=False, num_threads=None): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param train_batch_size: size of training batches :param test_batch_size: size of testing batches :param learning_rate: learning rate for training :param save: if true, the final model will be saved :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_fashion_mnist() if smoke_test: X_train, Y_train, X_test, Y_test = X_train[: 256], Y_train[: 256], X_test[: 256], Y_test[: 256] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y_soft = tf.placeholder(tf.float32, shape=(None, 10)) # Prepare optimizer learning_rate = 1e-4 clf_opt = tf.train.AdamOptimizer(learning_rate) dic_opt = tf.train.AdamOptimizer(learning_rate * 10) # Train an MNIST model train_params = { 'nb_epochs': 80, 'batch_size': 128, 'trade_off': 2, 'inner_epochs': 1 } rng = np.random.RandomState([2017, 8, 30]) # Adversarial training print("Start adversarial training") zero_knowledge_gandef_model = make_zero_knowledge_gandef_model( name="model_zero_knowledge_gandef") aug_x = gaussian_augment(x, std=1) preds_clean = zero_knowledge_gandef_model(x) preds_aug = zero_knowledge_gandef_model(aug_x) def cross_entropy(truth, preds, mean=True): # Get the logits operator op = preds.op if op.type == "Softmax": logits, = op.inputs else: logits = preds # Calculate cross entropy loss out = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=truth) # Take average loss and return if mean: out = tf.reduce_mean(out) return out def sigmoid_entropy(truth, preds, mean=True): # Get the logits operator op = preds.op if op.type == "Softmax": logits, = op.inputs else: logits = preds # Calculate cross entropy loss out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=truth) # Take average loss and return if mean: out = tf.reduce_mean(out) return out # Perform and evaluate adversarial training gan_train_v2(sess, x, y_soft, preds_clean, X_train, Y_train, loss_func=[cross_entropy, sigmoid_entropy], optimizer=[clf_opt, dic_opt], predictions_adv=preds_aug, evaluate=None, args=train_params, rng=rng, var_list=zero_knowledge_gandef_model.get_gan_params()) # Evaluate the accuracy of the MNIST model on Clean examples preds_clean = zero_knowledge_gandef_model(x) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': True, 'reject_threshold': 0.5 } clean_acc = confident_model_eval(sess, x, y_soft, preds_clean, X_test, Y_test, args=eval_params) print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc) report.adv_train_clean_eval = clean_acc # Evaluate the accuracy of the MNIST model on FGSM examples fgsm_params = {'eps': 0.6, 'clip_min': -1., 'clip_max': 1.} fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess) fgsm_adv = fgsm_att.generate(x, **fgsm_params) preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } fgsm_acc = confident_model_eval(sess, x, y_soft, preds_fgsm_adv, X_test, Y_test, args=eval_params) print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc) report.adv_train_adv_eval = fgsm_acc # Evaluate the accuracy of the MNIST model on BIM examples bim_params = {'eps': 0.6, 'eps_iter': 0.1, 'clip_min': -1., 'clip_max': 1.} bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess) bim_adv = bim_att.generate(x, **bim_params) preds_bim_adv = zero_knowledge_gandef_model(bim_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } bim_acc = confident_model_eval(sess, x, y_soft, preds_bim_adv, X_test, Y_test, args=eval_params) print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc) report.adv_train_adv_eval = bim_acc # Evaluate the accuracy of the MNIST model on PGD examples pgd_params = { 'eps': 0.6, 'eps_iter': 0.02, 'nb_iter': 40, 'clip_min': -1., 'clip_max': 1., 'rand_init': True } pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess) pgd_adv = pgd_att.generate(x, **bim_params) preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } pgd_acc = confident_model_eval(sess, x, y_soft, preds_pgd_adv, X_test, Y_test, args=eval_params) print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc) report.adv_train_adv_eval = pgd_acc # Save model if save: model_path = "models/zero_knowledge_gandef" vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model_zero_knowledge_gandef*') assert len(vars_to_save) > 0 saver = tf.train.Saver(var_list=vars_to_save) saver.save(sess, model_path) print('Model saved\n') else: print('Model not saved\n')
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, architecture=ARCHITECTURE, load_model=LOAD_MODEL, ckpt_dir='None', learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(int(time.time() * 1000) % 2**31) np.random.seed(int(time.time() * 1001) % 2**31) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') pgd_train = None if FLAGS.load_pgd_train_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_train_samples)) x_train = np.load(os.path.join(pgd_path, 'train_clean.npy')) y_train = np.load(os.path.join(pgd_path, 'train_y.npy')) pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy')) if x_train.shape[1] == 3: x_train = x_train.transpose((0, 2, 3, 1)) pgd_train = pgd_train.transpose((0, 2, 3, 1)) if len(y_train.shape) == 1: y_tmp = np.zeros((len(y_train), np.max(y_train) + 1), y_train.dtype) y_tmp[np.arange(len(y_tmp)), y_train] = 1. y_train = y_tmp x_test, y_test = data.get_set('test') pgd_test = None if FLAGS.load_pgd_test_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_test_samples)) x_test = np.load(os.path.join(pgd_path, 'test_clean.npy')) y_test = np.load(os.path.join(pgd_path, 'test_y.npy')) pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy')) if x_test.shape[1] == 3: x_test = x_test.transpose((0, 2, 3, 1)) pgd_test = pgd_test.transpose((0, 2, 3, 1)) if len(y_test.shape) == 1: y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype) y_tmp[np.arange(len(y_tmp)), y_test] = 1. y_test = y_tmp train_idcs = np.arange(len(x_train)) np.random.shuffle(train_idcs) x_train, y_train = x_train[train_idcs], y_train[train_idcs] if pgd_train is not None: pgd_train = pgd_train[train_idcs] test_idcs = np.arange(len(x_test))[:FLAGS.test_size] np.random.shuffle(test_idcs) x_test, y_test = x_test[test_idcs], y_test[test_idcs] if pgd_test is not None: pgd_test = pgd_test[test_idcs] # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} pgd_params = { # ord: , 'eps': FLAGS.eps, 'eps_iter': (FLAGS.eps / 5), 'nb_iter': 10, 'clip_min': 0, 'clip_max': 255 } cw_params = { 'binary_search_steps': FLAGS.cw_search_steps, 'max_iterations': FLAGS.cw_steps, #1000 'abort_early': True, 'learning_rate': FLAGS.cw_lr, 'batch_size': batch_size, 'confidence': 0, 'initial_const': FLAGS.cw_c, 'clip_min': 0, 'clip_max': 255 } # Madry dosen't divide by 255 x_train *= 255 x_test *= 255 if pgd_train is not None: pgd_train *= 255 if pgd_test is not None: pgd_test *= 255 print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train))) print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test))) print( 'clip_min : {}, clip_max : {} >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<' .format(pgd_params['clip_min'], pgd_params['clip_max'])) rng = np.random.RandomState() # [2017, 8, 30] debug_dict = dict() if FLAGS.save_debug_dict else None def do_eval(preds, x_set, y_set, report_key, is_adv=None, predictor=None, x_adv=None): if predictor is None: acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) else: do_eval(preds, x_set, y_set, report_key, is_adv=is_adv) if x_adv is not None: x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set], batch_size=batch_size) assert x_set.shape == x_set_adv.shape x_set = x_set_adv n_batches = math.ceil(x_set.shape[0] / batch_size) p_set, p_det = np.concatenate([ predictor.send(x_set[b * batch_size:(b + 1) * batch_size]) for b in tqdm.trange(n_batches) ]).T acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean() # if is_adv: # import IPython ; IPython.embed() ; exit(1) if FLAGS.save_debug_dict: debug_dict['x_set'] = x_set debug_dict['y_set'] = y_set ddfn = 'logs/debug_dict_{}.pkl'.format( 'adv' if is_adv else 'clean') if not os.path.exists(ddfn): with open(ddfn, 'wb') as f: pickle.dump(debug_dict, f) debug_dict.clear() if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples %s: %0.4f' % (report_text, 'with correction' if predictor is not None else 'without correction', acc)) if is_adv is not None: label = 'test_acc_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar(label, acc) if predictor is not None: detect = np.equal(p_det, is_adv).mean() label = 'test_det_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') print(label, detect) swriter.add_scalar(label, detect) label = 'test_dac_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar( label, np.equal(p_set, y_set[:len(p_set)].argmax(-1))[np.equal( p_det, is_adv)].mean()) return acc if clean_train: if architecture == 'ConvNet': model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet(scope='ResNet') else: raise Exception('Specify valid classifier architecture!') preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) if load_model: model_name = 'naturally_trained' if FLAGS.load_adv_trained: model_name = 'adv_trained' if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), model_name)) else: ckpt = tf.train.get_checkpoint_state('./models/' + model_name) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path saver = tf.train.Saver(var_list=dict( (v.name.split('/', 1)[1].split(':')[0], v) for v in tf.global_variables())) saver.restore(sess, ckpt_path) print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) logits_op = preds.op while logits_op.type != 'MatMul': logits_op = logits_op.inputs[0].op latent_x_tensor, weights = logits_op.inputs logits_tensor = preds nb_classes = weights.shape[-1].value if not FLAGS.save_pgd_samples: noise_eps = FLAGS.noise_eps.split(',') if FLAGS.noise_eps_detect is None: FLAGS.noise_eps_detect = FLAGS.noise_eps noise_eps_detect = FLAGS.noise_eps_detect.split(',') if pgd_train is not None: pgd_train = pgd_train[:FLAGS.n_collect] if not FLAGS.passthrough: predictor = tf_robustify.collect_statistics( x_train[:FLAGS.n_collect], y_train[:FLAGS.n_collect], x, sess, logits_tensor=logits_tensor, latent_x_tensor=latent_x_tensor, weights=weights, nb_classes=nb_classes, p_ratio_cutoff=FLAGS.p_ratio_cutoff, noise_eps=noise_eps, noise_eps_detect=noise_eps_detect, pgd_eps=pgd_params['eps'], pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'], pgd_iters=pgd_params['nb_iter'], save_alignments_dir='logs/stats' if FLAGS.save_alignments else None, load_alignments_dir=os.path.expanduser( '~/data/advhyp/madry/stats') if FLAGS.load_alignments else None, clip_min=pgd_params['clip_min'], clip_max=pgd_params['clip_max'], batch_size=batch_size, num_noise_samples=FLAGS.num_noise_samples, debug_dict=debug_dict, debug=FLAGS.debug, targeted=False, pgd_train=pgd_train, fit_classifier=FLAGS.fit_classifier, clip_alignments=FLAGS.clip_alignments, just_detect=FLAGS.just_detect) else: def _predictor(): _x = yield while (_x is not None): _y = sess.run(preds, {x: _x}).argmax(-1) _x = yield np.stack((_y, np.zeros_like(_y)), -1) predictor = _predictor() next(predictor) if FLAGS.save_alignments: exit(0) # Evaluate the accuracy of the model on clean examples acc_clean = do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, predictor=predictor) # Initialize the PGD attack object and graph if FLAGS.attack == 'pgd': pgd = MadryEtAl(model, sess=sess) adv_x = pgd.generate(x, **pgd_params) elif FLAGS.attack == 'cw': cw = CarliniWagnerL2(model, sess=sess) adv_x = cw.generate(x, **cw_params) elif FLAGS.attack == 'mean': pgd = MadryEtAl(model, sess=sess) mean_eps = FLAGS.mean_eps * FLAGS.eps def _attack_mean(x): x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1)) x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps, mean_eps) x_noisy = tf.clip_by_value(x_noisy, 0, 255) x_pgd = pgd.generate(x_noisy, **pgd_params) x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps) x_clip = tf.maximum(x_clip, x_many - FLAGS.eps) x_clip = tf.clip_by_value(x_clip, 0, 255) return x_clip adv_x = tf.map_fn(_attack_mean, x) adv_x = tf.reduce_mean(adv_x, 1) preds_adv = model.get_logits(adv_x) if FLAGS.save_pgd_samples: for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test, 'test')): train_batches = math.ceil(len(ds) / FLAGS.batch_size) train_pgd = np.concatenate([ sess.run(adv_x, { x: ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size] }) for b in tqdm.trange(train_batches) ]) np.save('logs/{}_clean.npy'.format(name), ds / 255.) np.save('logs/{}_y.npy'.format(name), y) train_pgd /= 255. np.save('logs/{}_pgd.npy'.format(name), train_pgd) exit(0) # Evaluate the accuracy of the model on adversarial examples if not FLAGS.load_pgd_test_samples: acc_pgd = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, predictor=predictor, x_adv=adv_x) else: acc_pgd = do_eval(preds, pgd_test, y_test, 'clean_train_adv_eval', True, predictor=predictor) swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0) print('Repeating the process, using adversarial training') exit(0) # Create a new model and train it to be robust to MadryEtAl if architecture == 'ConvNet': model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet() else: raise Exception('Specify valid classifier architecture!') pgd2 = MadryEtAl(model2, sess=sess) def attack(x): return pgd2.generate(x, **pgd_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For some attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) if load_model: if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained')) else: ckpt = tf.train.get_checkpoint_state('./models/adv_trained') ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path assert ckpt_path and tf_model_load( sess, file_path=ckpt_path), '\nMODEL LOADING FAILED' print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial # examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Evaluate model do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) return report
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
class TestMadryEtAl(CleverHansTest): def setUp(self): super(TestMadryEtAl, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = MadryEtAl(self.model, sess=self.sess) def test_attack_strength(self): """ If clipping is not done at each iteration (not using clip_min and clip_max), this attack fails by np.mean(orig_labels == new_labels) == .5 """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.05, clip_min=0.5, clip_max=0.7, nb_iter=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_clip_eta(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1, nb_iter=5) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertTrue(np.all(delta <= 1.)) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1, nb_iter=5, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_multiple_initial_random_step(self): """ This test generates multiple adversarial examples until an adversarial example is generated with a different label compared to the original label. This is the procedure suggested in Madry et al. (2017). This test will fail if an initial random step is not taken (error>0.5). """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs_multi = orig_labs.copy() # Generate multiple adversarial examples for i in range(10): x_adv = self.attack.generate_np(x_val, eps=.5, eps_iter=0.05, clip_min=0.5, clip_max=0.7, nb_iter=2) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) # Examples for which we have not found adversarial examples I = (orig_labs == new_labs_multi) new_labs_multi[I] = new_labs[I] self.assertTrue(np.mean(orig_labs == new_labs_multi) < 0.1)