def main(_): batch_size = FLAGS.batch_size batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) with tf.Session() as sess: model = InceptionV3Model(sess=sess) model._build() mim = MIM(model, back='tf', sess=None) mim_params = {'eps_iter': 0.06, 'eps': 0.3, 'nb_iter': 10, 'ord': 2, 'decay_factor': 1.0} x_adv = mim.generate(x_input, **mim_params) j = 0 z_samples = np.zeros((10000, 299, 299, 3)) real_samples = np.zeros((10000, 299, 299, 3)) for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) for (real, adv) in zip(images, adv_images): z_samples[j] = adv real_samples[j] = real #show(real, adv, model, sess) j += 1 if not (j % 100): print j if j >= 5000: print "Max examples exceeded, early stopping" break save_npy(real_samples, z_samples)
def main(_): """Run the sample attack""" batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] nb_classes = FLAGS.num_classes tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) one_hot_target_class = tf.one_hot(target_class_input, nb_classes) model = InceptionModel(nb_classes) # Run computation with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True)) as sess: mim = MomentumIterativeMethod(model, sess=sess) attack_params = {"eps": 32.0 / 255.0, "eps_iter": 0.01, "clip_min": -1.0, "clip_max": 1.0, \ "nb_iter": 20, "decay_factor": 1.0, "y_target": one_hot_target_class} x_adv = mim.generate(x_input, **attack_params) saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, FLAGS.checkpoint_path) for filenames, images, tlabels in load_images( FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={ x_input: images, target_class_input: tlabels }) save_images(adv_images, filenames, FLAGS.output_dir)
def main(_): """Run the sample attack""" batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] nb_classes = FLAGS.num_classes tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) one_hot_target_class = tf.one_hot(target_class_input, nb_classes) model = InceptionModel(nb_classes) # model = ResNetModel(nb_classes) # model = VGGModel(nb_classes) # Run computation os.environ["CUDA_VISIBLE_DEVICES"] = '0' # 指定第一块GPU可用 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.per_process_gpu_memory_fraction = 0.6 # 程序最多只能占用指定gpu90%的显存 config.gpu_options.allow_growth = True # 程序按需申请内存 with tf.Session(config=config) as sess: mim = MomentumIterativeMethod(model, sess=sess) attack_params = {"eps": 32.0 / 255.0, "eps_iter": 0.01, "clip_min": -1.0, "clip_max": 1.0, \ "nb_iter": 20, "decay_factor": 1.0, "y_target": one_hot_target_class} x_adv = mim.generate(x_input, **attack_params) saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, FLAGS.checkpoint_path) for filenames, images, tlabels in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images, target_class_input: tlabels}) save_images(adv_images, filenames, FLAGS.output_dir)
def main(_): batch_size = FLAGS.batch_size batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 targeted = False tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) with tf.Session() as sess: predictions = model(x_input) mim = MIM(model, back='tf', sess=None) mim_params = { 'eps_iter': 0.06, 'eps': 0.3, 'nb_iter': 10, 'ord': 2, 'decay_factor': 1.0 } x_adv = mim.generate(x_input, **mim_params) sys.exit(0) saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, FLAGS.checkpoint_path) z_samples = np.zeros((10000, 299, 299, 3)) real_samples = np.zeros((10000, 299, 299, 3)) meta_graph_def = tf.train.export_meta_graph( filename='tmp/imagenet/inception_v3.meta') saver.save(sess, 'tmp/imagenet/inception_v3.ckpt') #freeze_graph(sess) sys.exit(0) """ a = [n.name for n in tf.get_default_graph().as_graph_def().node] for item in a: print item """ j = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) for (real, adv) in zip(images, adv_images): z_samples[j] = adv real_samples[j] = real j += 1 if not (j % 100): print j if j >= 5000: print "Max examples exceeded, early stopping" break save_npy(real_samples, z_samples)
def setUp(self): super(TestMomentumIterativeMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.nn.softmax(tf.matmul(h1, W2)) return res self.sess = tf.Session() self.model = my_model self.attack = MomentumIterativeMethod(self.model, sess=self.sess)
def mim(X, which, prob, magn): wrapped = MomentumIterativeMethod(KerasModelWrapper(which.model), sess=session) X = X.copy() idx = np.random.uniform(size=len(X)) idx = np.where(idx < prob)[0] for i in tqdm(range(0, len(idx), CHILD_BATCH_SIZE), desc=f'batch: ', leave=False): tensor = tf.convert_to_tensor(X[idx[i:i + CHILD_BATCH_SIZE]]) init = tf.global_variables_initializer() session.run(init) tensor = wrapped.generate(tensor, eps=0.1 * magn) X[idx[i:i + CHILD_BATCH_SIZE]] = session.run(tensor) return X
def load_attack(sess, attack_method, model, targeted, adv_ys, eps, batch_size): if attack_method == 'fgsm': from cleverhans.attacks import FastGradientMethod model_prob = lambda x: model.predict(x, softmax=True) attack = FastGradientMethod(model_prob, sess=sess) attack_params, yname = config_fgsm(targeted, adv_ys, eps, batch_size) if attack_method == 'pgd': from cleverhans.attacks import MadryEtAl model_prob = lambda x: model.predict(x, softmax=True) attack = MadryEtAl(model_prob, sess=sess) attack_params, yname = config_madry(targeted, adv_ys, eps, batch_size) if attack_method == 'mim': from cleverhans.attacks import MomentumIterativeMethod model_prob = lambda x: model.predict(x, softmax=True) attack = MomentumIterativeMethod(model_prob, sess=sess) attack_params, yname = config_mim(targeted, adv_ys, eps, batch_size) if attack_method == 'cw': from cleverhans.attacks import CarliniWagnerL2 model_logit = lambda x: model.predict(x, softmax=False) attack = CarliniWagnerL2(model_logit, sess=sess) attack_params, yname = config_cw(targeted, adv_ys, eps, batch_size) return attack, attack_params, yname
class TestMomentumIterativeMethod(TestBasicIterativeMethod): def setUp(self): super(TestMomentumIterativeMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.nn.softmax(tf.matmul(h1, W2)) return res self.sess = tf.Session() self.model = my_model self.attack = MomentumIterativeMethod(self.model, sess=self.sess) def test_generate_np_can_be_called_with_different_decay_factor(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) for dacay_factor in [0.0, 0.5, 1.0]: x_adv = self.attack.generate_np(x_val, eps=0.5, ord=np.inf, dacay_factor=dacay_factor, clip_min=-5.0, clip_max=5.0) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertClose(delta, 0.5)
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. batch_size = FLAGS.batch_size batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 targeted = False tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) with tf.Session() as sess: mim = MIM(model, back='tf', sess=None) mim_params = { 'eps_iter': 0.06, 'eps': 0.3, 'nb_iter': 10, 'ord': 2, 'decay_factor': 1.0 } x_adv = mim.generate(x_input, **mim_params) saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) saver.restore(sess, FLAGS.checkpoint_path) sess.run(tf.global_variables_initializer()) # with tf.train.MonitoredSession(session_creator=session_creator) as sess: i = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) print "input images: ", images.shape #adv_images = cw.generate_np(images, **cw_params) i += 16 print i # print filenames # print adv_images.shape # adv_images = cw.generate_np( save_images(adv_images, filenames, FLAGS.output_dir)
def main(_): eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) #model = mymodels.InceptionV3(num_classes) #model = mymodels.EnsAdvInceptionResNetV2(num_classes) #model = mymodels.NasNetLarge(num_classes) model = mymodels.EnsembleModel(num_classes, [ mymodels.InceptionV3(num_classes), mymodels.Ens3AdvInceptionV3(num_classes), mymodels.Ens4AdvInceptionV3(num_classes), mymodels.EnsAdvInceptionResNetV2(num_classes), mymodels.AdvInceptionV3(num_classes) ], weight=[4.0, 1.0, 1.0, 1.0, 4.0]) if FLAGS.target < 0: target = None else: target = tf.constant( np.zeros([FLAGS.batch_size]) + FLAGS.target, tf.int32) target = tf.one_hot(target, num_classes) with tf.Session() as sess: #attacker = FastGradientMethod(model) attacker = MomentumIterativeMethod(model) x_adv = attacker.generate(x_input, eps=eps, eps_iter=eps / 5.0, clip_min=-1., clip_max=1., y_target=target, nb_iter=80) model.restore(sess) for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir)
def mim(model): wrap = KerasModelWrapper(model) att = MomentumIterativeMethod(wrap, sess=session) def attack(X, eps): for i in tqdm(range(0, len(X), CHILD_BATCH_SIZE), desc=f'MIM: ', file=sys.stdout, leave=False): # print(X[i:i+CHILD_BATCH_SIZE].shape) tensor = tf.convert_to_tensor(X[i:i + CHILD_BATCH_SIZE]) tensor = att.generate(tensor, eps=eps, eps_iter=eps * 0.2) X[i:i + CHILD_BATCH_SIZE] = session.run(tensor) return attack
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) mim = MIM(model, back='tf', sess=None) mim_params = { 'eps_iter': 0.06, 'eps': 0.3, 'nb_iter': 10, 'ord': 2, 'decay_factor': 1.0 } x_adv = mim.generate(x_input, **mim_params) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir)
class TestMomentumIterativeMethod(TestBasicIterativeMethod): def setUp(self): super(TestMomentumIterativeMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = MomentumIterativeMethod(self.model, sess=self.sess) def test_generate_np_can_be_called_with_different_decay_factor(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) for dacay_factor in [0.0, 0.5, 1.0]: x_adv = self.attack.generate_np(x_val, eps=0.5, ord=np.inf, dacay_factor=dacay_factor, clip_min=-5.0, clip_max=5.0) delta = np.max(np.abs(x_adv - x_val), axis=1) self.assertClose(delta, 0.5)
def train(cifar10_data, logfile): """Train CIFAR-10 for a number of steps.""" logfile.write("fgsm_eps \t %g, epsilon \t %d \n" % (fgsm_eps, target_eps[0])) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[3, 3, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases1 = cifar10._variable_on_cpu('biases1', [128], tf.constant_initializer(0.0)) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases2 = cifar10._variable_on_cpu('biases2', [128], tf.constant_initializer(0.1)) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases3 = cifar10._variable_on_cpu('biases3', [256], tf.constant_initializer(0.1)) #with tf.variable_scope('local4') as scope: kernel4 = cifar10._variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004) biases4 = cifar10._variable_on_cpu('biases4', [hk], tf.constant_initializer(0.1)) #with tf.variable_scope('local5') as scope: kernel5 = cifar10._variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0) biases5 = cifar10._variable_on_cpu('biases5', [10], tf.constant_initializer(0.1)) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5, scale2, beta2 ] ######## # Build a Graph that computes the logits predictions from the # inference model. shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 14, 14, 128]) sigma = tf.placeholder(tf.float32) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise); y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) #logits = inference(images) # Calculate loss. Apply Taylor Expansion for the output layer loss = cifar10.lossDPSGD(y_conv, y_) # noise redistribution # grad, = tf.gradients(loss, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))**2 sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = tf.train.GradientDescentOptimizer(lr) gw_K1 = tf.gradients(loss, kernel1)[0] gb1 = tf.gradients(loss, biases1)[0] gw_K2 = tf.gradients(loss, kernel2)[0] gb2 = tf.gradients(loss, biases2)[0] gw_K3 = tf.gradients(loss, kernel3)[0] gb3 = tf.gradients(loss, biases3)[0] gw_K4 = tf.gradients(loss, kernel4)[0] gb4 = tf.gradients(loss, biases4)[0] gw_K5 = tf.gradients(loss, kernel5)[0] gb5 = tf.gradients(loss, biases5)[0] #clip gradient gw_K1 = tf.clip_by_norm(gw_K1, clip_bound) gw_K2 = tf.clip_by_norm(gw_K2, clip_bound) gw_K3 = tf.clip_by_norm(gw_K3, clip_bound) gw_K4 = tf.clip_by_norm(gw_K4, clip_bound) gw_K5 = tf.clip_by_norm(gw_K5, clip_bound) #perturb gw_K1 += tf.random_normal(shape=tf.shape(gw_K1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K2 += tf.random_normal(shape=tf.shape(gw_K2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K3 += tf.random_normal(shape=tf.shape(gw_K3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K4 += tf.random_normal(shape=tf.shape(gw_K4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gw_K5 += tf.random_normal(shape=tf.shape(gw_K5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb3 += tf.random_normal(shape=tf.shape(gb3), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb4 += tf.random_normal(shape=tf.shape(gb4), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size gb5 += tf.random_normal(shape=tf.shape(gb5), mean=0.0, stddev=(sigma * sensitivity), dtype=tf.float32) / batch_size # apply gradients and keep tracking moving average of the parameters apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1), (gb1, biases1), (gw_K2, kernel2), (gb2, biases2), (gw_K3, kernel3), (gb3, biases3), (gw_K4, kernel4), (gb4, biases4), (gw_K5, kernel5), (gb5, biases5)], global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.control_dependencies( [apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size) # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Privacy accountant priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / batch_size)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / batch_size)) #number of steps for one epoch s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128]) print("Compute The Noise Redistribution Vector") for step in xrange(_global_step, 100 * step_for_epoch): batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0, sigma: sigma_value * 0 }) if step % (5 * step_for_epoch) == 0: print(loss_value) batch = cifar10_data.train.next_batch(40 * batch_size) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * ( sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis sess.run(init) print("Training") for step in xrange(_global_step, _global_step + T): start_time = time.time() batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2}) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: (__noiseE + __noiseH) / 2, sigma: sigma_value }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if step % (5 * step_for_epoch) == 0: print(loss_value) print(spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break ## Robustness print("Testing") adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch(test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={x: test_bach[0]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 * 128).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128]) _noise = np.random.normal(0.0, sigmaHGM, 14 * 14 * 128).astype(np.float32) _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## log_str = "" for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n')
def train_child(t, p, m, load_dict=False): # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3]) raw_model = TestCNN().cuda(0) model = TestCNN().cuda(0) tf_model = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') session = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) fgsm = FastGradientMethod(cleverhans_model, sess=session) # stm = SpatialTransformationMethod(cleverhans_model, sess=session) # cw2 = CarliniWagnerL2(cleverhans_model, sess=session) pgd = ProjectedGradientDescent(cleverhans_model, sess=session) noise = Noise(cleverhans_model, sess=session) mim = MomentumIterativeMethod(cleverhans_model, sess=session) df = DeepFool(cleverhans_model, sess=session) tf_raw_model = convert_pytorch_model_to_tf(raw_model) cleverhans_raw_model = CallableModelWrapper(tf_raw_model, output_layer='logits') # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session) noise_raw = Noise(cleverhans_raw_model, sess=session) def fgsm_op(x, eps): att = fgsm.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) # def stm_op(x, eps): # att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps) # return session.run(att, feed_dict={x_op: x}) # def cw2_op(x, eps): # att = cw2.generate(x_op, max_iterations=3) def pgd_op(x, eps): att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) return session.run(att, feed_dict={x_op: x}) # def pgd_raw_op(x, eps): # att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) # return session.run(att, feed_dict={x_op: x}) def noise_op(x, eps): att = noise.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def noise_raw_op(x, eps): att = noise_raw.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def df_op(x): att = df.generate(x_op, nb_candidate=10, max_iter=3) return session.run(att, feed_dict={x_op: x}) def mim_op(x, eps): att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2) return session.run(att, feed_dict={x_op: x}) def attack_train(x): attacks = [fgsm_op, pgd_op, mim_op] attacks_name = ['FGSM', 'PGD', 'MIM'] eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]] train_x_adv = x.copy() adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv)) for i, (ti, pi, mi) in enumerate( tqdm(zip(t, p, m), total=len(t), desc='Subpolicy: ', leave=False)): adv_i = train_x_adv[adv_type == i] for j, (tj, pj, mj) in enumerate( tqdm(zip(ti, pi, mi), total=len(ti), desc='Operation: ', leave=False)): tj, pj, mj = (*tj, *pj, *mj) adv_j = adv_i[np.random.randn(len(adv_i)) < pj] for i in tqdm(range(0, len(adv_j), BATCH_SIZE), desc=attacks_name[tj] + ': ', leave=False): adv_j[i:][:BATCH_SIZE] = attacks[tj]( adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT * (eps[tj][1] - eps[tj][0]) + eps[tj][0]) return train_x_adv optimizer = optim.SGD(model.parameters(), lr=1e-3) raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3) train_x_adv = attack_train(train_x) adv_trainset = torch.utils.data.TensorDataset( torch.tensor(train_x_adv, dtype=torch.float), torch.tensor(train_y, dtype=torch.long)) adv_trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) if load_dict: model.load_state_dict(torch.load('black_eval_runs/model.pt')) optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt')) raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt')) raw_optimizer.load_state_dict( torch.load('black_eval_runs/raw_optimizer.pt')) model.train() batch_tqdm = tqdm(adv_trainloader, leave=False) for x, y in batch_tqdm: optimizer.zero_grad() output = model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}') batch_tqdm = tqdm(trainloader, leave=False) raw_model.train() for x, y in batch_tqdm: raw_optimizer.zero_grad() output = raw_model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() raw_optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}') with torch.no_grad(): model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_adv_acc = tot_acc / len(val_x) raw_model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_adv_acc = tot_acc / len(val_x) with open('black_eval_runs/acc.csv', 'a') as f: f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n') print( f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}' ) torch.save(model.state_dict(), 'black_eval_runs/model.pt') torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt') torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt') torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile, parameter_dict): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" # make sure variables are placed on cpu # TODO: for AWS version, check if put variables on GPU will be better with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) attacks = ['ifgsm', 'mim', 'madry'] # manually create all scopes with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope: scope_conv1 = scope with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope: scope_conv2 = scope with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope: scope_conv3 = scope with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope: scope_local4 = scope with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope: scope_local5 = scope # Parameters Declarification #with tf.variable_scope('conv1') as scope: # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): with tf.variable_scope(scope_conv1) as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) # shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity) with tf.variable_scope(scope_conv2) as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_conv3) as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local4) as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local5) as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) # group these for use as parameters params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] scopes = [ scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5 ] # placeholders for input values FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) # one time noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # one time adv_noise = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) # one time x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3 ]) # input is the bunch of n_batchs x_list = tf.split(x_sb, N_GPUS, axis=0) # split it into each batch adv_x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x_list = tf.split(adv_x_sb, N_GPUS, axis=0) x_test = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs y_list = tf.split(y_sb, N_GPUS, axis=0) # split it into each batch adv_y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs # adv_y_list = tf.split(adv_y_sb, N_GPUS, axis=0) # split it into each batch y_test = tf.placeholder(tf.float32, [None, 10]) # re-arrange the input samples _split_adv_y_sb = tf.split(adv_y_sb, N_AUX_GPUS, axis=0) reorder_adv_y_sb = [] for i in range(N_GPUS): reorder_adv_y_sb.append( tf.concat([ _split_adv_y_sb[i + N_GPUS * atk_index] for atk_index in range(len(attacks)) ], axis=0)) tower_pretrain_grads = [] tower_train_grads = [] all_train_loss = [] pretrain_opt = tf.train.AdamOptimizer(learning_rate) train_opt = tf.train.GradientDescentOptimizer(learning_rate) # batch index bi = 0 for gpu in GPU_IDX: # putting ops on each tower (GPU) with tf.device('/gpu:{}'.format(gpu)): print('Train inference GPU placement') print('/gpu:{}'.format(gpu)) # Auto-Encoder # # pretrain_adv and pretrain_benign are cost tensor of the encoding layer with tf.variable_scope(scope_conv1) as scope: Enc_Layer2 = EncLayer(inpt=adv_x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2( xShape=tf.shape(adv_x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h, bn_index=bi) Enc_Layer3 = EncLayer(inpt=x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h, bn_index=bi) pretrain_cost = pretrain_adv + pretrain_benign # this cost is not used # cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0); # benign conv output x_image = x_list[bi] + noise y_conv = inference(x_image, FM_h, params, scopes, training=True, bn_index=bi) # softmax_y_conv = tf.nn.softmax(y_conv) # adv conv output adv_x_image = adv_x_list[bi] + adv_noise y_adv_conv = inference(adv_x_image, FM_h, params, scopes, training=True, bn_index=bi) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] train_loss = cifar10.TaylorExp(y_conv, y_list[bi], y_adv_conv, reorder_adv_y_sb[bi], L, alpha, perturbW) all_train_loss.append(train_loss) # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) # compute tower gradients pretrain_grads = pretrain_opt.compute_gradients( pretrain_cost, var_list=pretrain_var_list) train_grads = train_opt.compute_gradients( train_loss, var_list=train_var_list) # get_pretrain_grads(pretrain_cost, global_step, learning_rate, pretrain_var_list) # train_grads = get_train_grads(train_loss, global_step, learning_rate, train_var_list) # note this list contains grads and variables tower_pretrain_grads.append(pretrain_grads) tower_train_grads.append(train_grads) # batch index bi += 1 # average the gradient from each tower pretrain_var_dict = {} all_pretrain_grads = {} avg_pretrain_grads = [] for var in tf.get_collection(AECODER_VARIABLES): if var.name not in all_pretrain_grads: all_pretrain_grads[var.name] = [] pretrain_var_dict[var.name] = var for tower in tower_pretrain_grads: for var_grad in tower: all_pretrain_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_pretrain_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_pretrain_grads = [ tf.expand_dims(g, 0) for g in all_pretrain_grads[var_name] ] concat_pretrain_grads = tf.concat(expand_pretrain_grads, axis=0) reduce_pretrain_grads = tf.reduce_mean(concat_pretrain_grads, 0) # rebuild (grad, var) list avg_pretrain_grads.append( (reduce_pretrain_grads, pretrain_var_dict[var_name])) print('*****************************') print("avg_pretrain_grads:") for avg_pretrain_grad in avg_pretrain_grads: print('grads') print((avg_pretrain_grad[0].name, avg_pretrain_grad[0].shape)) print('var') print((avg_pretrain_grad[1].name, avg_pretrain_grad[1].shape)) print('------') train_var_dict = {} all_train_grads = {} avg_train_grads = [] for var in tf.get_collection(CONV_VARIABLES): if var.name not in all_train_grads: all_train_grads[var.name] = [] train_var_dict[var.name] = var for tower in tower_train_grads: for var_grad in tower: all_train_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_train_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_train_grads = [ tf.expand_dims(g, 0) for g in all_train_grads[var_name] ] concat_train_grads = tf.concat(expand_train_grads, axis=0) reduce_train_grads = tf.reduce_mean(concat_train_grads, 0) # rebuild (grad, var) list avg_train_grads.append( (reduce_train_grads, train_var_dict[var_name])) print('*****************************') print("avg_train_grads:") for avg_train_grad in avg_train_grads: print('grads') print((avg_train_grad[0].name, avg_train_grad[0].shape)) print('var') print((avg_train_grad[1].name, avg_train_grad[1].shape)) print('------') print('*****************************') # get averaged loss tensor avg_loss = tf.reduce_mean(tf.stack(all_train_loss), axis=0) # TODO: take the average of the bn variables from each tower/training GPU # currently, testing is using the bn variables on bn_index 0 (tower/training GPU 0) # build train op (apply average gradient to variables) # according to 1.13 doc, updates need to be manually applied _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print('update ops:') print(_update_ops) with tf.control_dependencies(_update_ops): pretrain_op = pretrain_opt.apply_gradients(avg_pretrain_grads, global_step=global_step) train_op = train_opt.apply_gradients(avg_train_grads, global_step=global_step) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # init kernel 1 and get some values from it sess.run(kernel1.initializer) dp_epsilon = 0.005 parameter_dict['dp_epsilon'] = dp_epsilon _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) parameter_dict['epsilon2_update'] = epsilon2_update print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) # NOTE: these values needs to be calculated in testing delta_r = fgsm_eps * (image_size**2) parameter_dict['delta_r'] = delta_r _sensitivityW = sess.run(sensitivity) parameter_dict['_sensitivityW'] = _sensitivityW delta_h = _sensitivityW * (14**2) parameter_dict['delta_h'] = delta_h #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) dp_mult = (Delta2) / (L * epsilon2_update * (delta_h / 2 + delta_r)) parameter_dict['dp_mult'] = dp_mult # place test-time inference into CPU with tf.device('/cpu:0'): # testing pipeline test_x_image = x_test + noise test_y_conv = inference(test_x_image, FM_h, params, scopes, training=True, bn_index=0) test_softmax_y_conv = tf.nn.softmax(test_y_conv) # ============== attacks ================ iter_step_training = 3 parameter_dict['iter_step_training'] = iter_step_training # iter_step_testing = 1000 aux_dup_count = N_GPUS # split input x_super_batch into N_AUX_GPUS parts x_attacks = tf.split(x_sb, N_AUX_GPUS, axis=0) # split input x_test into aux_dup_count parts x_test_split = tf.split(x_test, aux_dup_count, axis=0) # setup all attacks # attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, scopes=scopes, image_size=image_size, adv_noise=adv_noise) attack_tensor_training_dict = {} attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # build each attack for atk_idx in range(len(attacks)): atk = attacks[atk_idx] print('building attack {} tensors'.format(atk)) # for each gpu assign to each attack attack_tensor_training_dict[atk] = [] attack_tensor_testing_dict[atk] = [] for i in range(aux_dup_count): if atk == 'ifgsm': with tf.device('/gpu:{}'.format(AUX_GPU_IDX[i])): print('ifgsm GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i])) # ifgsm tensors for training ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( ifgsm_obj.generate(x=x_attacks[i], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) elif atk == 'mim': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])): print('mim GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])) # mim tensors for training mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( mim_obj.generate( x=x_attacks[i + 1 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)) elif atk == 'madry': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])): print('madry GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])) # madry tensors for training madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( madry_obj.generate( x=x_attacks[i + 2 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) # combine all attack tensors adv_concat_list = [] for i in range(aux_dup_count): adv_concat_list.append( tf.concat( [attack_tensor_training_dict[atk][i] for atk in attacks], axis=0)) # the tensor that contains each batch of adv samples for training # has same sample order as the labels adv_super_batch_tensor = tf.concat(adv_concat_list, axis=0) #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) print('******************** debug info **********************') # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) print('pretrain var list') for v in pretrain_var_list: print((v.name, v.shape)) print('**********************************') train_var_list = tf.get_collection(CONV_VARIABLES) print('train var list') for v in train_var_list: print((v.name, v.shape)) print('**********************************') # all variables print('all variables') vl = tf.global_variables() for v in vl: print((v.name, v.shape)) print('**********************************') # all ops ops = [n.name for n in tf.get_default_graph().as_graph_def().node] print('total number of ops') print(len(ops)) # for op in ops: # print(op) print('******************** debug info **********************') # exit() # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps print('total number of steps: {}'.format(T)) step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch parameter_dict['step_for_epoch'] = step_for_epoch print('step_for_epoch: {}'.format(step_for_epoch)) # generate some fixed noise perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) # one time perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) # one time parameter_dict['perturbH_test'] = perturbH_test print('perturbH_test') print(perturbH_test.shape) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) # one time perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) # one time parameter_dict['perturbFM_h'] = perturbFM_h print('perturbFM_h') print(perturbFM_h.shape) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) # one time parameter_dict['Noise'] = Noise Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update, L) # one time parameter_dict['Noise_test'] = Noise_test print('Noise and Noise_test') print(Noise.shape) print(Noise_test.shape) # exit() # some timing variables adv_duration_total = 0.0 adv_duration_count = 0 train_duration_total = 0.0 train_duration_count = 0 # some debug flag adv_batch_flag = True batch_flag = True L_flag = True parameter_flag = True _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() # TODO: fix this d_eps = random.random() * 0.5 # d_eps = 0.25 print('d_eps: {}'.format(d_eps)) # version with 3 AUX GPU # get two super batchs, one for benign training, one for adv training super_batch_images, super_batch_labels = cifar10_data.train.next_super_batch( N_GPUS, random=True) super_batch_images_for_adv, super_batch_adv_labels = cifar10_data.train.next_super_batch_premix_ensemble( N_GPUS, random=True) # TODO: re-arrange the adv labels to match the adv samples # run adv_tensors_batch_concat to generate adv samples super_batch_adv_images = sess.run(adv_super_batch_tensor, feed_dict={ x_sb: super_batch_images_for_adv, adv_noise: Noise, mu_alpha: [d_eps] }) adv_finish_time = time.time() adv_duration = adv_finish_time - start_time adv_duration_total += adv_duration adv_duration_count += 1 if adv_batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) print(super_batch_adv_images.shape) print(super_batch_adv_labels.shape) adv_batch_flag = False if batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) batch_flag = False if L_flag: print("L: {}".format(L)) L_flag = False if parameter_flag: print('*=*=*=*=*') print(parameter_dict) print('*=*=*=*=*', flush=True) logfile.write('*=*=*=*=*\n') logfile.write(str(parameter_dict)) logfile.write('*=*=*=*=*\n') parameter_flag = False _, _, avg_loss_value = sess.run( [pretrain_op, train_op, avg_loss], feed_dict={ x_sb: super_batch_images, y_sb: super_batch_labels, adv_x_sb: super_batch_adv_images, adv_y_sb: super_batch_adv_labels, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) assert not np.isnan( avg_loss_value), 'Model diverged with loss = NaN' train_finish_time = time.time() train_duration = train_finish_time - adv_finish_time train_duration_total += train_duration train_duration_count += 1 # save model every 50 epochs if step % (50 * step_for_epoch) == 0 and (step >= 50 * step_for_epoch): print('saving model') checkpoint_path = os.path.join(os.getcwd() + dirCheckpoint, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # Save the model checkpoint periodically. # if step % (10*step_for_epoch) == 0 and (step > _global_step): if step % 10 == 0 and (step > _global_step): # print n steps and time print("current epoch: {:.2f}".format(step / step_for_epoch)) num_examples_per_step = L * N_GPUS * 2 avg_adv_duration = adv_duration_total / adv_duration_count avg_train_duration = train_duration_total / train_duration_count avg_total_duration = avg_adv_duration + avg_train_duration examples_per_sec = num_examples_per_step / avg_total_duration sec_per_step = avg_total_duration # sec_per_batch = sec_per_step / (N_GPUS * 2) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.2f ' 'sec/step; %.2f sec/adv_gen_op; %.2f sec/train_op)') actual_str = format_str % ( datetime.now(), step, avg_loss_value, examples_per_sec, sec_per_step, avg_adv_duration, avg_train_duration) print(actual_str, flush=True) logfile.write(actual_str + '\n')
#Definning the session sess = backend.get_session() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 784)) y = tf.placeholder(tf.float32, shape=(None, 10)) pred = np.argmax(keras_model.predict(x_test), axis=1) acc = np.mean(np.equal(pred, y_test)) print("The Test accuracy is: {}".format(acc)) #################################### Adversarial Attack (MIM) ################################### wrap = KerasModelWrapper(keras_model) mim = MomentumIterativeMethod(wrap, back='tf', sess=sess) mim_params = { 'eps': 0.7, 'eps_iter': 0.7, 'nb_iter': 10, 'y_target': None, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } adv_x = mim.generate_np(x_test, **mim_params) adv_conf = keras_model.predict(adv_x) adv_pred = np.argmax(adv_conf, axis=1) adv_acc = np.mean(np.equal(adv_pred, y_test)) print("The adversarial accuracy is: {}".format(adv_acc))
def whitebox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, eps=0.3, online_training=False, test_on_dev=True, attack_type='fgsm', defense_type='gan', num_tests=-1, num_train=-1): """Based on MNIST tutorial from cleverhans. Args: gan: A `GAN` model. rec_data_path: A string to the directory. batch_size: The size of the batch. learning_rate: The learning rate for training the target models. nb_epochs: Number of epochs for training the target model. eps: The epsilon of FGSM. online_training: Training Defense-GAN with online reconstruction. The faster but less accurate way is to reconstruct the dataset once and use it to train the target models with: `python train.py --cfg <path-to-model> --save_recs` attack_type: Type of the white-box attack. It can be `fgsm`, `rand+fgsm`, or `cw`. defense_type: String representing the type of attack. Can be `none`, `defense_gan`, or `adv_tr`. """ FLAGS = tf.flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) if defense_type == 'defense_gan': assert gan is not None # Create TF session. if defense_type == 'defense_gan': sess = gan.sess if FLAGS.train_on_recs: assert rec_data_path is not None or online_training else: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev) rec_test_images = test_images rec_test_labels = test_labels _, _, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape = [None] + list(train_images.shape[1:]) images_pl = tf.placeholder(tf.float32, shape=[None] + list(train_images.shape[1:])) labels_pl = tf.placeholder(tf.float32, shape=[None] + [train_labels.shape[1]]) if num_tests > 0: test_images = test_images[:num_tests] rec_test_images = rec_test_images[:num_tests] test_labels = test_labels[:num_tests] if num_train > 0: train_images = train_images[:num_train] train_labels = train_labels[:num_train] # GAN defense flag. models = { 'A': model_a, 'B': model_b, 'C': model_c, 'D': model_d, 'E': model_e, 'F': model_f } model = models[FLAGS.model](input_shape=x_shape, nb_classes=train_labels.shape[1]) preds = model.get_probs(images_pl) report = AccuracyReport() def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples. eval_params = {'batch_size': batch_size} acc = model_eval(sess, images_pl, labels_pl, preds, rec_test_images, rec_test_labels, args=eval_params, feed={K.learning_phase(): 0}) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, } rng = np.random.RandomState([11, 24, 1990]) tf.set_random_seed(11241990) preds_adv = None if FLAGS.defense_type == 'adv_tr': attack_params = { 'eps': FLAGS.fgsm_eps_tr, 'clip_min': 0., 'clip_max': 1. } if gan: if gan.dataset_name == 'celeba': attack_params['clip_min'] = -1.0 attack_obj = FastGradientMethod(model, sess=sess) adv_x_tr = attack_obj.generate(images_pl, **attack_params) adv_x_tr = tf.stop_gradient(adv_x_tr) preds_adv = model(adv_x_tr) model_train(sess, images_pl, labels_pl, preds, train_images, train_labels, args=train_params, rng=rng, predictions_adv=preds_adv, init_all=False, feed={K.learning_phase(): 1}, evaluate=evaluate) # Calculate training error. eval_params = {'batch_size': batch_size} acc = model_eval( sess, images_pl, labels_pl, preds, train_images, train_labels, args=eval_params, feed={K.learning_phase(): 0}, ) print('[#] Accuracy on clean examples {}'.format(acc)) if attack_type is None: return acc, 0, None # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph. if FLAGS.defense_type == 'defense_gan': z_init_val = None if FLAGS.same_init: z_init_val = tf.constant( np.random.randn(batch_size * gan.rec_rr, gan.latent_dim).astype(np.float32)) model.add_rec_model(gan, z_init_val, batch_size) min_val = 0.0 if gan: if gan.dataset_name == 'celeba': min_val = -1.0 if 'rand' in FLAGS.attack_type: test_images = np.clip( test_images + args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val, 1.0) eps -= args.alpha if 'fgsm' in FLAGS.attack_type: attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } attack_obj = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'cw': attack_obj = CarliniWagnerL2(model, back='tf', sess=sess) attack_iterations = 100 attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 10.0, 'batch_size': batch_size, 'initial_const': 100, 'feed': { K.learning_phase(): 0 } } elif FLAGS.attack_type == 'mim': attack_obj = MomentumIterativeMethod(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } elif FLAGS.attack_type == 'deepfool': attack_obj = DeepFool(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'clip_min': min_val, 'clip_max': 1., 'nb_candidate': 2, 'nb_classes': 2 } elif FLAGS.attack_type == 'lbfgs': attack_obj = LBFGS(model, back='tf', sess=sess) attack_params = {'clip_min': min_val, 'clip_max': 1.} adv_x = attack_obj.generate(images_pl, **attack_params) eval_par = {'batch_size': batch_size} if FLAGS.defense_type == 'defense_gan': preds_adv = model.get_probs(adv_x) num_dims = len(images_pl.get_shape()) avg_inds = list(range(1, num_dims)) diff_op = tf.reduce_mean(tf.square(adv_x - images_pl), axis=avg_inds) acc_adv, roc_info = model_eval_gan( sess, images_pl, labels_pl, preds_adv, None, test_images=test_images, test_labels=test_labels, args=eval_par, feed={K.learning_phase(): 0}, diff_op=diff_op, ) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) else: preds_adv = model(adv_x) roc_info = None acc_adv = model_eval(sess, images_pl, labels_pl, preds_adv, test_images, test_labels, args=eval_par, feed={K.learning_phase(): 0}) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) if FLAGS.debug and gan is not None: # To see some qualitative results. adv_x_debug = adv_x[:batch_size] images_pl_debug = images_pl[:batch_size] debug_dir = os.path.join('debug', 'whitebox', FLAGS.debug_dir) ensure_dir(debug_dir) reconstructed_tensors = gan.reconstruct(adv_x_debug, batch_size=batch_size, reconstructor_id=2) x_rec_orig = gan.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=3) x_adv_sub_val = sess.run(x_adv_sub, feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) sess.run(tf.local_variables_initializer()) x_rec_debug_val, x_rec_orig_val = sess.run( [reconstructed_tensors, x_rec_orig], feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv') postfix = 'gen_rec' save_images_files(x_rec_debug_val, output_dir=debug_dir, postfix=postfix) save_images_files(images_pl_debug, output_dir=debug_dir, postfix='orig') save_images_files(x_rec_orig_val, output_dir=debug_dir, postfix='orig_rec') return acc_adv, 0, roc_info
def setUp(self): super(TestMomentumIterativeMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = MomentumIterativeMethod(self.model, sess=self.sess)
feed_dict={ x: x_fgsm_iter, y: y_test }) print('\nAccuracy: {:.2f}'.format(acc['fgsm_iter'])) # Momentum iterative FGSM attack if attack_params['momentum']['run'] is True: momentum_params = attack_params['momentum']['params'] batch_size = attack_params['momentum']['batch_size'] print( '\nStarting momentum iterative FGSM with eps = {:.2f}, delta = {:.2f}, niter = {}' .format(momentum_params['eps'], momentum_params['eps_iter'], momentum_params['nb_iter'])) momentum = MomentumIterativeMethod(model, sess=sess) adv_momentum = momentum.generate(x, y=y, **momentum_params) x_momentum = np.zeros(x_test.shape) for i in trange(num_images // batch_size): batch = slice(i * batch_size, (i + 1) * batch_size) x_momentum[batch] = sess.run(adv_momentum, feed_dict={ x: x_test[batch], y: y_test[batch] }) acc['momentum'] = sess.run(accuracy, feed_dict={x: x_momentum, y: y_test}) print('Accuracy: {:.2f}'.format(acc['momentum'])) # PGD attack with multiple random restarts if attack_params['pgd']['run'] is True:
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1, adversarial_training = ADVERSARIAL_TRAINING, attacking = ATTACKING,origin_method=ORIGIN_METHOD, save_model=SAVE_MODEL,model_type=MODEL_TYPE): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session os.environ["CUDA_VISIBLE_DEVICES"] = '0' # only use No.0 GPU config = tf.ConfigProto() config.allow_soft_placement=True config.gpu_options.allow_growth = True sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph the_model = modelA if model_type == 'a': the_model = modelA elif model_type == 'b': the_model = modelB elif model_type == 'c': the_model = modelC else: exit('the model type must be a or b or c.') model = the_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap = KerasModelWrapper(model) preds = model(x) # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph if origin_method == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'bim': att_method = BasicIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.06, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'mifgsm': att_method = MomentumIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.08, 'nb_iter': 10, 'decay_factor': 0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) def attack(x): return att_method.generate(x, **att_method_params) def evaluate2(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc print('AT Test accuracy on legitimate examples: %0.4f' % acc) # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params) print('AT Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) train_dir = train_dir + '/' + model_type + '/' + origin_method if not os.path.exists(train_dir): os.makedirs(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate2() else: print("Model was not loaded, training from scratch.") loss2 = CrossEntropy(wrap, smoothing=label_smoothing,attack=attack) train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng) if save_model: saver = tf.train.Saver(max_to_keep=1) saver.save(sess, '{}/{}.ckpt'.format(train_dir,origin_method), global_step=NB_EPOCHS) keras.models.save_model(model, '{}/{}_mnist.h5'.format(train_dir,origin_method)) print("model has been saved") # >>> other method >>> if adversarial_training: method = ['fgsm','bim','mifgsm'] for i in range(3): attacking = method[i] if attacking == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'bim': att_method = BasicIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.06, 'nb_iter':10, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'mifgsm': att_method = MomentumIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.08, 'nb_iter':10, 'decay_factor':0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) eval_par = {'batch_size': batch_size} start_time = time.time() acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f' % acc) end_time = time.time() print("{} attack time is {}\n".format(attacking,end_time-start_time)) report.clean_train_adv_eval = acc gc.collect()
def main(args): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize]) dataset = ImageDataset(args.image_folder, transform=transform, return_paths=True) # n_images = len(dataset) dataloader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, pin_memory=True, num_workers=0) model = models.resnet50(pretrained=True).to(args.device) model.eval() config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 224, 224, )) tf_model = convert_pytorch_model_to_tf(model, args.device) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') # compute clip_min and clip_max suing a full black and a full white image clip_min = normalize(torch.zeros(3, 1, 1)).min().item() clip_max = normalize(torch.ones(3, 1, 1)).max().item() eps = args.eps / 255. eps_iter = 20 nb_iter = 10 args.ord = np.inf if args.ord < 0 else args.ord grad_params = {'eps': eps, 'ord': args.ord} common_params = {'clip_min': clip_min, 'clip_max': clip_max} iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter} attack_name = '' if args.attack == 'fgsm': attack_name = '_L{}_eps{}'.format(args.ord, args.eps) attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params} elif args.attack == 'iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = BasicIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'm-iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'pgd': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'jsma': attack_op = SaliencyMapMethod(cleverhans_model, sess=sess) attack_params = {'theta': eps, 'symbolic_impl': False, **common_params} elif args.attack == 'deepfool': attack_op = DeepFool(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'cw': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'lbfgs': attack_op = LBFGS(cleverhans_model, sess=sess) target = np.zeros((1, 1000)) target[0, np.random.randint(1000)] = 1 y = tf.placeholder(tf.float32, target.shape) attack_params = {'y_target': y, **common_params} attack_name = args.attack + attack_name print('Running [{}]. Params: {}'.format(args.attack.upper(), attack_params)) adv_x_op = attack_op.generate(x_op, **attack_params) adv_preds_op = tf_model(adv_x_op) preds_op = tf_model(x_op) n_success = 0 n_processed = 0 progress = tqdm(dataloader) for paths, x in progress: progress.set_description('ATTACK') z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op], feed_dict={ x_op: x, y: target }) src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1) success = src != dst success_paths = np.array(paths)[success] success_adv_x = adv_x[success] success_src = src[success] success_dst = dst[success] n_success += success_adv_x.shape[0] n_processed += x.shape[0] progress.set_postfix( {'Success': '{:3.2%}'.format(n_success / n_processed)}) progress.set_description('SAVING') for p, a, s, d in zip(success_paths, success_adv_x, success_src, success_dst): path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d) path = os.path.join(args.out_folder, path) np.savez_compressed(path, img=a)
# Define adv attack deepfool = DeepFool(wrap, sess=sess) deepfool_params = {'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1.} # Attack images x_deepfool = deepfool.generate(x[0], **deepfool_params) # Consider the attack to be constant x_deepfool = tf.stop_gradient(x_deepfool) # Evaluate predictions on adv attacks preds_deepfool = model(x_deepfool) acc_deepfool, acc_op_deepfool = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(preds_deepfool, 1)) # Define adv attack momentum_iterative = MomentumIterativeMethod(wrap, sess=sess) momentum_iterative_params = { 'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1. } # Attack images x_momentum_iterative = momentum_iterative.generate(x[0], **deepfool_params) # Consider the attack to be constant x_momentum_iterative = tf.stop_gradient(x_momentum_iterative) # Evaluate predictions on adv attacks preds_momentum_iterative = model(x_momentum_iterative) acc_momentum_iterative, acc_op_momentum_iterative = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(preds_momentum_iterative, 1))
def test(): """ """ tf.reset_default_graph() g = tf.get_default_graph() with g.as_default(): # Placeholder nodes. images_holder = tf.placeholder( tf.float32, [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS]) label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES]) is_training = tf.placeholder(tf.bool, ()) # model model = model_cifar100.RDPCNN(images_holder, label_holder, FLAGS.INPUT_SIGMA, is_training) # for adv examples model_loss = model.loss() model_acc = model.cnn_accuracy # robust def inference(x): logits, _ = model.cnn.prediction(x) return logits def inference_prob(x): _, probs = model.cnn.prediction(x) return probs graph_dict = {} graph_dict["images_holder"] = images_holder graph_dict["label_holder"] = label_holder graph_dict["is_training"] = is_training config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config, graph=g) as sess: sess.run(tf.global_variables_initializer()) # load model model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME) # adv test #################################################################################################### x_advs = {} ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # FastGradientMethod fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_advs["fgsm"] = fgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, clip_min=0.0, clip_max=1.0) # testing now # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_advs["mim"] = mim_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_advs["madry"] = madry_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) graph_dict["x_advs"] = x_advs #################################################################################################### # tensorboard writer #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g) print("\nTest") if FLAGS.local: total_test_batch = 2 else: total_test_batch = None dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item() test_info(sess, model, True, graph_dict, dp_info, FLAGS.TEST_LOG_FILENAME, total_batch=total_test_batch) robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
def SSGD_resnet_testing(TIN_data, resnet_params, train_params, test_params, all_params): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable( 'kernel1', shape=[ train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) dp_mult = all_params['dp_mult'] # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable( 'kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_sb') # input is the bunch of n_batchs x_test = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_test') y_sb = tf.placeholder( tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') noise = tf.placeholder(tf.float32, [ None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ], name='noise') # one time keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') with tf.device('/gpu:0'): # the model for testing y_logits_test, _ = test_inference( x_sb, train_params.attack_norm_bound * noise, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test = tf.nn.softmax(y_logits_test) correct_prediction = tf.equal(tf.argmax(y_logits_test, 1), tf.argmax(y_sb, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # print all variables print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name or 'gamma' in var.name or 'beta' in var.name): if var not in train_vars: train_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') ###################################### # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # list all checkpoints in ckpt_path checkpoint_path_read = os.path.join(os.getcwd() + test_params.check_point_dir) ckpts = tf.train.get_checkpoint_state(checkpoint_path_read) print(ckpts) # find the ckpt we need to load and load it for ckpt in ckpts.all_model_checkpoint_paths: # print(ckpt) ckpt_step = int(ckpt.split('-')[-1]) if ckpt_step == test_params.step_to_load: saver.restore(sess, ckpt) print('model loaded from {}'.format(ckpt)) # ####################################### # # setup all attacks attack_switch = { 'fgsm': False, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_output_probs, output_layer='probs', keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) with tf.device('/gpu:0'): if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod with tf.device('/gpu:0'): if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['mim'] = mim_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) with tf.device('/gpu:0'): if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['madry'] = madry_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # ####################################### sigmaEGM = all_params['sigmaEGM'] __noiseE = all_params['__noiseE'] grad_redis = all_params['grad_redis'] _sensitivity_2 = all_params['_sensitivity_2'] _sensitivityW = all_params['_sensitivityW'] Delta_redis = all_params['Delta_redis'] sigmaHGM = all_params['sigmaHGM'] __noiseH = all_params['__noiseH'] __noise_zero = all_params['__noise_zero'] #################################### #################################### print('start testing') start_time = time.time() log_file_path = os.getcwd() + test_params.log_file_path log_file = open(log_file_path, 'a', encoding='utf-8') attacks_and_benign = test_params.attacks + ['benign'] #===================adv samples===================== # for each eps setting for fgsm_eps in test_params.fgsm_eps_list: adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} log_str = '' eps_start_time = time.time() # cover all test data for i in range(test_params.test_epochs): test_batch = TIN_data.test.next_batch( test_params.test_batch_size) adv_images_dict = {} # test for each attack for atk in attacks_and_benign: start_time = time.time() if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run( attack_tensor_testing_dict[atk], feed_dict={ x_sb: test_batch[0], mu_alpha: [fgsm_eps], keep_prob: 1.0 }) testing_img = adv_images_dict[atk] else: continue print('adv gen time: {}s'.format(time.time() - start_time)) start_time = time.time() ### PixelDP Robustness ### predictions_form_argmax = np.zeros([ test_params.test_batch_size, train_params.num_classes ]) softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(1, test_params.num_samples + 1): if n_draws % 100 == 0: print( 'current draws: {}, avg draw time: {}s'.format( n_draws, (time.time() - start_time) / n_draws)) _noiseE = np.random.normal( 0.0, sigmaEGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noiseE = np.reshape(_noiseE, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) _noise = np.random.normal( 0.0, sigmaHGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noise = np.reshape(_noise, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) * grad_redis for j in range(test_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_params.test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] += np.sum( is_correct) * 1.0 / test_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum( is_robust) * 1.0 / test_params.test_batch_size dt = time.time() - start_time print('atk test time: {}s'.format(dt), flush=True) ############################## # average all acc for whole test data log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n") log_str += 'model trained epoch: {}\n'.format( test_params.epoch_to_test) log_str += 'fgsm_eps: {}\n'.format(fgsm_eps) log_str += 'iter_step_testing: {}\n'.format( test_params.iter_step_testing) log_str += 'num_samples: {}\n'.format(test_params.num_samples) for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs robust_adv_acc_dict[ atk] = robust_adv_acc_dict[atk] / test_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[ atk] / test_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) dt = time.time() - eps_start_time print('total test time: {}s'.format(dt), flush=True) print(log_str, flush=True) print('*******************') log_file.write(log_str) log_file.write('*******************\n') log_file.flush() dt = time.time() - start_time log_file.close()
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) eps_benign = 1 / (1 + eps2_ratio) * (epsilon2) eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity ) #2*3*(14*14 + 2)*16/(L*sensitivity) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local4') as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local5') as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #scale2 = tf.Variable(tf.ones([hk])) #beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] ######## # Build a Graph that computes the logits predictions from the # inference model. FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # Auto-Encoder # Enc_Layer2 = EncLayer(inpt=adv_x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0) ### x_image = x + noise y_conv = inference(x_image, FM_h, params) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) adv_x += adv_noise y_adv_conv = inference(adv_x, FM_h, params) adv_y_ = tf.placeholder(tf.float32, [None, 10]) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha, perturbW) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]); pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize( pretrain_adv + pretrain_benign, global_step=global_step, var_list=pretrain_var_list) train_op = cifar10.train(loss, global_step, learning_rate, _var_list=train_var_list) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(kernel1.initializer) dp_epsilon = 1.0 _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) delta_r = fgsm_eps * (image_size**2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW * (14**2) #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + ( 2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon) dynamic_eps = tf.placeholder(tf.float32) """y_test = inference(x, FM_h, params) softmax_y = tf.nn.softmax(y_test); c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0) x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])""" attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size, adv_noise=adv_noise) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. #tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32) #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3]) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32) #_W_adv = np.reshape(_W_adv, [32, 32, 3]) #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128) #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]); test_size = len(cifar10_data.test.images) #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt') #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta); #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) #generateNoise(image_size, Delta2, eps_adv, L, beta); Noise_test = generateIdLMNoise( image_size, 0, epsilon2_update, L) #generateNoise(image_size, 0, 2*epsilon2, test_size, beta); emsemble_L = int(L / 3) preT_epochs = 100 pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1) """logfile.write("pretrain: \n") for step in range(_global_step, _global_step + pre_T): d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test}) #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h}); batch = cifar10_data.train.next_batch(L); sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h}); if step % int(25*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128) logfile.write("step \t %d \t %g \n"%(step, cost_value)) print(cost_value) print('pre_train finished')""" _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() d_eps = random.random() * 0.5 batch = cifar10_data.train.next_batch(emsemble_L) #Get a random batch. y_adv_batch = batch[1] """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})""" adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) adv_images = np.append(np.append(adv_images_ifgsm, adv_images_mim, axis=0), adv_images_madry, axis=0) batch = cifar10_data.train.next_batch(L) #Get a random batch. sess.run(pretrain_step, feed_dict={ x: batch[0], adv_x: adv_images, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h }) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], adv_x: adv_images, adv_y_: y_adv_batch, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (50 * step_for_epoch) == 0 and step >= (300 * step_for_epoch): '''predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test}) argmax_predictions = np.argmax(softmax_predictions, axis=1) """for n_draws in range(0, 2000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L) _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);""" for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 2000; """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1)""" final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)''' #===================adv samples===================== log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format( step, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch( test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_bach[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros( [test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _BenignLNoise = generateIdLMNoise( image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if n_draws == 500: print("n_draws = 500") for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append( robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum( is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # Save the model checkpoint periodically. if step % (10 * step_for_epoch) == 0 and (step > _global_step): num_examples_per_step = L examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, adversarial_training=ADVERSARIAL_TRAINING): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :param adversarial_training: True means using adversarial training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: # put data on cpu and gpu both config_args = dict(allow_soft_placement=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} mifgsm_params = { 'eps': 0.5, 'clip_min': 0., 'eps_iter': 0.002, 'nb_iter': 10, 'clip_max': 1., 'ord': np.inf } rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) """ when training, evaluating can be happened """ train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model model_path = sys.path[0] + '/simple_cifar10.ckpt' saver = tf.train.Saver(max_to_keep=1) saver.save(sess, model_path) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Basic Iterative Method (BIM) attack object and # graph mifgsm = MomentumIterativeMethod(model, sess=sess) for i in range(20): adv_x = mifgsm.generate(x, **mifgsm_params) preds_adv = model.get_logits(adv_x) print("eps:%0.2f" % (mifgsm_params["eps_iter"] * mifgsm_params['nb_iter'])) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) mifgsm_params['eps_iter'] = mifgsm_params['eps_iter'] + 0.002 # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') if not adversarial_training: return report # Create a new model and train it to be robust to BasicIterativeMethod model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) bim2 = MomentumIterativeMethod(model2, sess=sess) def attack(x): return bim2.generate(x, **mifgsm_params) # add attack to loss loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
#################################### #MIM print("\n\n") print("MIM") mim_params = { 'eps': float(sys.argv[1]), 'eps_iter': 0.01, 'nb_iter': 500, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } mim_source = MomentumIterativeMethod(wrap_source, sess=sess) X_adv_source = np.zeros((len(indices_test), 32, 32, 3)) for i in np.arange(0, len(indices_test), 500): X_adv_source[i:(i + 500)] = mim_source.generate_np( X_test[indices_test[i:(i + 500)]], **mim_params) print("metrics source model") print(metrics(model_source, X_adv_source, X_test, pred_source, indices_test)) print("metrics base model") print(metrics(model, X_adv_source, X_test, pred_base, indices_test)) pred_source_adv = np.argmax(model_source.predict(X_adv_source), axis=1) pred_adv_basefromsource = np.argmax(model.predict(X_adv_source), axis=1) agree_func(indices_test, pred_adv_basefromsource, pred_source_adv, pred_base, pred_source)