def data_setup(): args = gv.args if 'MNIST' in args.dataset: X_train, Y_train, X_test, Y_test = data_mnist() Y_test_uncat = np.argmax(Y_test, axis=1) print('Loaded f/MNIST data') elif args.dataset == 'CIFAR-10': (X_train, Y_train_uncat), (X_test, Y_test_uncat) = cifar10.load_data() # Convert class vectors to binary class matrices. Y_train = np_utils.to_categorical(Y_train_uncat, gv.NUM_CLASSES) Y_test = np_utils.to_categorical(Y_test_uncat, gv.NUM_CLASSES) X_train = X_train.astype('float32') X_test = X_test.astype('float32') # subtract mean and normalize mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_test -= mean_image X_train /= 128. X_test /= 128. print('Loaded CIFAR-10 data') elif args.dataset == 'census': X_train, Y_train, X_test, Y_test = data_census() Y_test_uncat = np.argmax(Y_test, axis=1) print(Y_test) print(Y_test_uncat) print('Loaded Census data') return X_train, Y_train, X_test, Y_test, Y_test_uncat
def main(): x = K.placeholder((None, 28, 28, 1)) model = load_model(args.model) logits = model(x) prediction = K.softmax(logits) if args.mnist: _, _, X, Y = data_mnist(one_hot=False) accuracy = get_accuracy(X, Y, prediction, x) output_results(accuracy) elif args.n is None: with np.load(args.dataset) as data: X = data['drawings'] / 255 Y = data['Y'].reshape(-1, ) accuracy = get_accuracy(X, Y, prediction, x) output_results(accuracy) else: result = [] for i in tqdm(range(1, args.n + 1)): with np.load(args.dataset % i) as data: X = data['drawings'] / 255 Y = data['Y'].reshape(-1, ) predictions = \ K.get_session().run([prediction], feed_dict={x: X, K.learning_phase(): 0})[0] argmax = np.argmax(predictions, axis=1) if args.attack and not args.targeted: equal = argmax != Y else: equal = argmax == Y accuracy = np.mean(equal) result.append(accuracy) print(result)
def main(attack, src_model_name, target_model_name): np.random.seed(0) tf.set_random_seed(0) dim = 28 * 28 * 1 x = K.placeholder((None, 28, 28, 1)) y = K.placeholder((None, 10)) _, _, X_test, Y_test = data_mnist() Y_test_uncat = np.argmax(Y_test, axis=1) # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_model = load_model(target_model_name) # simply compute test error if attack == "test": _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(src_model_name), err)) _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(target_model_name), err)) return if args.targeted_flag == 1: targets = [] allowed_targets = list(range(10)) for i in range(len(Y_test)): allowed_targets.remove(Y_test_uncat[i]) targets.append(np.random.choice(allowed_targets)) allowed_targets = list(range(10)) targets = np.array(targets) print(targets) targets_cat = np_utils.to_categorical(targets, 10).astype(np.float32) Y_test = targets_cat logits = src_model(x) print('logits', logits) if args.loss_type == 'xent': loss, grad = gen_grad_ens(x, logits, y) assert grad is not None elif args.loss_type == 'cw': grad = gen_grad_cw(x, logits, y) if args.targeted_flag == 1: grad = -1.0 * grad for eps in eps_list: # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"] and args.norm == 'linf': assert grad is not None adv_x = symbolic_fgs(x, grad, eps=eps) elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2': adv_x = symbolic_fg(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": l = 1000 X_test = X_test[0:l] Y_test = Y_test[0:l] adv_x = x # iteratively apply the FGSM with small step size for i in range(args.num_iter): adv_logits = src_model(adv_x) if args.loss_type == 'xent': loss, grad = gen_grad_ens(adv_x, adv_logits, y) elif args.loss_type == 'cw': grad = gen_grad_cw(adv_x, adv_logits, y) if args.targeted_flag == 1: grad = -1.0 * grad adv_x = symbolic_fgs(adv_x, grad, args.delta, True) r = adv_x - x r = K.clip(r, -eps, eps) adv_x = x + r adv_x = K.clip(adv_x, 0, 1) print('Generating adversarial samples') X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] avg_l2_perturb = np.mean(np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1)) # white-box attack l = len(X_adv) print('Carrying out white-box attack') preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l]) if args.targeted_flag == 1: err = 100.0 - err print('{}->{}: {:.1f}'.format(src_model_name, src_model_name, err)) # black-box attack if target_model_name is not None: print('Carrying out black-box attack') preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test) if args.targeted_flag == 1: err = 100.0 - err print('{}->{}: {:.1f}, {}, {} {}'.format(src_model_name, basename(target_model_name), err, avg_l2_perturb, eps, attack))
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err) return # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err)
def main(target_model_name, target=None): np.random.seed(0) tf.set_random_seed(0) x = K.placeholder((None, IMAGE_ROWS, IMAGE_COLS, NUM_CHANNELS)) y = K.placeholder((None, NUM_CLASSES)) dim = int(IMAGE_ROWS * IMAGE_COLS) _, _, X_test_ini, Y_test = data_mnist() print('Loaded data') Y_test_uncat = np.argmax(Y_test, axis=1) # target model for crafting adversarial examples target_model = load_model(target_model_name) target_model_name = basename(target_model_name) logits = target_model(x) prediction = K.softmax(logits) sess = tf.Session() print('Creating session') if '_un' in args.method: targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1) elif RANDOM is False: targets = np.array([target] * (BATCH_SIZE * BATCH_EVAL_NUM)) elif RANDOM is True: targets = [] allowed_targets = list(range(NUM_CLASSES)) for i in range(BATCH_SIZE * BATCH_EVAL_NUM): allowed_targets.remove(Y_test_uncat[i]) targets.append(np.random.choice(allowed_targets)) allowed_targets = list(range(NUM_CLASSES)) # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM) targets = np.array(targets) print targets targets_cat = np_utils.to_categorical(targets, NUM_CLASSES).astype(np.float32) if args.norm == 'linf': # eps_list = list(np.linspace(0.025, 0.1, 4)) # eps_list.extend(np.linspace(0.15, 0.5, 8)) eps_list = [0.3] if "_iter" in args.method: eps_list = [0.3] elif args.norm == 'l2': eps_list = list(np.linspace(0.0, 2.0, 5)) eps_list.extend(np.linspace(2.5, 9.0, 14)) # eps_list = [5.0] print(eps_list) random_perturb = np.random.randn(*X_test_ini.shape) if args.norm == 'linf': random_perturb_signed = np.sign(random_perturb) X_test = np.clip(X_test_ini + args.alpha * random_perturb_signed, CLIP_MIN, CLIP_MAX) elif args.norm == 'l2': random_perturb_unit = random_perturb / np.linalg.norm( random_perturb.reshape(curr_len, dim), axis=1)[:, None, None, None] X_test = np.clip(X_test_ini + args.alpha * random_perturb_unit, CLIP_MIN, CLIP_MAX) for eps in eps_list: if '_iter' in args.method: white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test, X_test_ini, Y_test_uncat, targets, targets_cat, eps, dim, args.beta) estimated_grad_attack_iter(X_test, X_test_ini, x, targets, prediction, logits, eps, dim, args.beta) else: white_box_fgsm(prediction, target_model, x, logits, y, X_test, X_test_ini, Y_test_uncat, targets, targets_cat, eps, dim)
def main(target_model_name): np.random.seed(0) tf.set_random_seed(0) x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS) _, _, X_test, Y_test = data_mnist() print('Loaded data') # target model for crafting adversarial examples target_model = load_model(target_model_name) target_model_name = basename(target_model_name) logits = target_model(x) prediction = K.softmax(logits) sess = tf.Session() print('Creating session') Y_test_uncat = np.argmax(Y_test, 1) means, class_frac = class_means(X_test, Y_test_uncat) scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat) if args.norm == 'linf': eps_list = list(np.linspace(0.0, 0.1, 5)) eps_list.extend(np.linspace(0.2, 0.5, 7)) elif args.norm == 'l2': eps_list = list(np.linspace(0.0, 9.0, 28)) for eps in eps_list: eps_orig = eps if args.alpha > eps: alpha = eps eps = 0 elif eps >= args.alpha: alpha = args.alpha eps -= args.alpha adv_success = 0.0 avg_l2_perturb = 0.0 for i in range(FLAGS.NUM_CLASSES): curr_indices = np.where(Y_test_uncat == i) X_test_ini = X_test[curr_indices] Y_test_curr = Y_test_uncat[curr_indices] curr_len = len(X_test_ini) if args.targeted_flag == 1: allowed_targets = list(range(FLAGS.NUM_CLASSES)) allowed_targets.remove(i) random_perturb = np.random.randn(*X_test_ini.shape) if args.norm == 'linf': random_perturb_signed = np.sign(random_perturb) X_test_curr = np.clip( X_test_ini + alpha * random_perturb_signed, CLIP_MIN, CLIP_MAX) elif args.norm == 'l2': random_perturb_unit = random_perturb / np.linalg.norm( random_perturb.reshape(curr_len, dim), axis=1)[:, None, None, None] X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit, CLIP_MIN, CLIP_MAX) if args.targeted_flag == 0: closest_class = int(closest_means[i]) mean_diff_vec = means[closest_class] - means[i] elif args.targeted_flag == 1: targets = [] mean_diff_array = np.zeros( (curr_len, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) for j in range(curr_len): target = np.random.choice(allowed_targets) targets.append(target) mean_diff_array[j] = means[target] - means[i] if args.norm == 'linf': if args.targeted_flag == 0: mean_diff_vec_signed = np.sign(mean_diff_vec) perturb = eps * mean_diff_vec_signed elif args.targeted_flag == 1: mean_diff_array_signed = np.sign(mean_diff_array) perturb = eps * mean_diff_array_signed elif args.norm == 'l2': mean_diff_vec_unit = mean_diff_vec / np.linalg.norm( mean_diff_vec.reshape(dim)) perturb = eps * mean_diff_vec_unit X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX) # Getting the norm of the perturbation perturb_norm = np.linalg.norm( (X_adv - X_test_ini).reshape(curr_len, dim), axis=1) perturb_norm_batch = np.mean(perturb_norm) avg_l2_perturb += perturb_norm_batch predictions_adv = K.get_session().run([prediction], feed_dict={ x: X_adv, K.learning_phase(): 0 })[0] if args.targeted_flag == 0: adv_success += np.sum( np.argmax(predictions_adv, 1) != Y_test_curr) elif args.targeted_flag == 1: print(targets) adv_success += np.sum( np.argmax(predictions_adv, 1) == np.array(targets)) err = 100.0 * adv_success / len(X_test) avg_l2_perturb = avg_l2_perturb / FLAGS.NUM_CLASSES print('{}, {}, {}'.format(eps, alpha, err)) print('{}'.format(avg_l2_perturb))
target_model_name = basename(args.target_model) set_mnist_flags() np.random.seed(0) tf.set_random_seed(0) x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS) _, _, X_test, Y_test = data_mnist() print('Loaded data') # target model for crafting adversarial examples target_model = load_model(args.target_model) target_model_name = basename(target_model_name) logits = target_model(x) prediction = K.softmax(logits) sess = tf.Session() print('Creating session') targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1) # elif RANDOM is False: # targets = np.array([target]*(BATCH_SIZE*BATCH_EVAL_NUM))
RANDOM = True PCA_FLAG=False if args.num_comp is not None: PCA_FLAG=True if '_iter' in args.method: BATCH_EVAL_NUM = 10 else: BATCH_EVAL_NUM = 1 CLIP_MIN = 0.0 if args.dataset == 'MNIST': target_model_name = basename(args.target_model) _, _, X_test_ini, Y_test = data_mnist() X_test = X_test_ini print('Loaded data') IMAGE_ROWS = 28 IMAGE_COLS = 28 NUM_CHANNELS = 1 NUM_CLASSES = 10 CLIP_MAX = 1.0 if args.norm == 'linf': eps_list = list(np.linspace(0.0, 0.1, 3)) eps_list.extend(np.linspace(0.1, 0.5, 9)) # eps_list = [0.3] if "_iter" in args.method:
def main(target_model_name): redraw = False save_adv_samples_only = True np.random.seed(0) tf.set_random_seed(0) x = K.placeholder((None, 28, 28, 1)) y = K.placeholder((None, 10)) dim = int(28 * 28 * 1) _, _, X_test, Y_test = data_mnist() print('Loaded data') # target model for crafting adversarial examples target_model = load_model(target_model_name) target_model_name = basename(target_model_name) logits = target_model(x) prediction = K.softmax(logits) sess = tf.Session() print('Creating session') Y_test_uncat = np.argmax(Y_test, 1) means, class_frac = class_means(X_test, Y_test_uncat) scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat) eps_list = [args.eps] adv_images = np.empty((10 * 28, len(eps_list) * 28)) total_X_adv = np.empty(X_test.shape) total_Y = np.empty(Y_test_uncat.shape) total_adv_pred = np.empty(Y_test_uncat.shape) total_adv_prob = np.empty(Y_test_uncat.shape) for eps_idx, eps in tqdm(enumerate(eps_list)): eps_orig = eps if args.alpha > eps: alpha = eps eps = 0 elif eps >= args.alpha: alpha = args.alpha eps -= args.alpha adv_success = 0.0 avg_l2_perturb = 0.0 NUM_SAVED = 0 for i in tqdm(range(10)): curr_indices = np.where(Y_test_uncat == i) NUM_SAMPLES = len(curr_indices[0]) X_test_ini = X_test[curr_indices] Y_test_curr = Y_test_uncat[curr_indices] curr_len = len(X_test_ini) if args.targeted_flag == 1: allowed_targets = list(range(10)) allowed_targets.remove(i) random_perturb = np.random.randn(*X_test_ini.shape) if args.norm == 'linf': random_perturb_signed = np.sign(random_perturb) X_test_curr = np.clip(X_test_ini + alpha * random_perturb_signed, CLIP_MIN, CLIP_MAX) elif args.norm == 'l2': random_perturb_unit = random_perturb / np.linalg.norm( random_perturb.reshape(curr_len, dim), axis=1)[:, None, None, None] X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit, CLIP_MIN, CLIP_MAX) if args.targeted_flag == 0: closest_class = int(closest_means[i]) mean_diff_vec = means[closest_class] - means[i] elif args.targeted_flag == 1: targets = [] mean_diff_array = np.zeros( (curr_len, 28, 28, 1)) for j in range(curr_len): target = np.random.choice(allowed_targets) targets.append(target) mean_diff_array[j] = means[target] - means[i] if args.norm == 'linf': if args.targeted_flag == 0: mean_diff_vec_signed = np.sign(mean_diff_vec) perturb = eps * mean_diff_vec_signed elif args.targeted_flag == 1: mean_diff_array_signed = np.sign(mean_diff_array) perturb = eps * mean_diff_array_signed elif args.norm == 'l2': mean_diff_vec_unit = mean_diff_vec / np.linalg.norm(mean_diff_vec.reshape(dim)) perturb = eps * mean_diff_vec_unit X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX) assert X_adv.shape[1:] == total_X_adv.shape[1:], f'X_adv.shape[1:] = {X_adv.shape[ 1:]}, total_X_adv.shape[1:] = {total_X_adv.shape[ 1:]}' assert X_adv.shape[0] == NUM_SAMPLES, f'X_adv.shape[0] = {X_adv.shape[0]}' total_X_adv[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = X_adv # sample_x_adv = save_sample(X_adv, eps, i) if redraw: sample_x_adv = draw( images=sample_x_adv.reshape( (1, sample_x_adv.shape[0], sample_x_adv.shape[1], sample_x_adv.shape[2])), n=10, alpha=0.8, background='000000' )[0] # row_start = i * 28 # col_start = eps_idx * 28 # no_channels_x = sample_x_adv.reshape(28, 28) # adv_images[row_start:row_start + 28, # col_start: col_start + 28] = no_channels_x # Getting the norm of the perturbation perturb_norm = np.linalg.norm((X_adv - X_test_ini).reshape(curr_len, dim), axis=1) perturb_norm_batch = np.mean(perturb_norm) avg_l2_perturb += perturb_norm_batch predictions_adv = \ K.get_session().run([prediction], feed_dict={x: X_adv, K.learning_phase(): 0})[0] total_adv_pred[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.argmax(predictions_adv, axis=1) total_adv_prob[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.max(predictions_adv, axis=1) if args.targeted_flag == 0: adv_success += np.sum(np.argmax(predictions_adv, 1) != Y_test_curr) assert Y_test_curr.shape[1:] == total_Y.shape[ 1:], f'Y_test_curr.shape[1:] = {Y_test_curr.shape[ 1:]}, total_Y.shape[1:] = {total_Y.shape[ 1:]}' assert Y_test_curr.shape[0] == NUM_SAMPLES, f'Y_test_curr.shape[0] = { Y_test_curr.shape[0]}' total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = Y_test_curr elif args.targeted_flag == 1: targets_arr = np.array(targets) adv_success += np.sum(np.argmax(predictions_adv, 1) == targets_arr) assert targets_arr.shape[1:] == total_Y.shape[ 1:], f'targets_arr.shape[1:] = {targets_arr.shape[ 1:]}, total_Y.shape[1:] = {total_Y.shape[ 1:]}' assert targets_arr.shape[0] == NUM_SAMPLES, f'targets_arr.shape[0] = { targets_arr.shape[0]}' total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = targets_arr NUM_SAVED += NUM_SAMPLES err = 100.0 * adv_success / len(X_test) avg_l2_perturb = avg_l2_perturb / 10 print(f'eps = {eps}, alpha = {alpha}, adv success = {err}') print(f'avg l2 pertub = {avg_l2_perturb}') if redraw: scipy.misc.imsave('baseline_attacks_redrawned.png', adv_images) else: # save_total(adv_images, avg_l2_perturb, err) file = os.path.join(ADVERSARIAL_DATA_PATH, f'baseline-norm-{args.norm}-alpha-{args.alpha}-targeted-{args.targeted_flag}-adv-samples') np.savez(file=file, X=total_X_adv, Y=total_Y, pred=total_adv_pred, prob=total_adv_prob)
def main(attack, src_model_names, target_model_name): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches') set_mnist_flags() dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() Y_test_uncat = np.argmax(Y_test, axis=1) # source model for crafting adversarial examples src_models = [None] * len(src_model_names) for i in range(len(src_model_names)): src_models[i] = load_model(src_model_names[i]) src_model_name_joint = '' for i in range(len(src_models)): src_model_name_joint += basename(src_model_names[i]) # model(s) to target if target_model_name is not None: target_model = load_model(target_model_name) # simply compute test error if attack == "test": for (name, src_model) in zip(src_model_names, src_models): _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(name), err) if target_model_name is not None: _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(target_model_name), err) return if args.targeted_flag == 1: pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p' if os.path.exists(pickle_name): targets = pickle.load(open(pickle_name, 'rb')) else: targets = [] allowed_targets = list(range(FLAGS.NUM_CLASSES)) for i in range(len(Y_test)): allowed_targets.remove(Y_test_uncat[i]) targets.append(np.random.choice(allowed_targets)) allowed_targets = list(range(FLAGS.NUM_CLASSES)) # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM) targets = np.array(targets) print targets targets_cat = np_utils.to_categorical( targets, FLAGS.NUM_CLASSES).astype(np.float32) Y_test = targets_cat if SAVE_FLAG == True: pickle.dump(Y_test, open(pickle_name, 'wb')) # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = [None] * len(src_model_names) for i in range(len(src_model_names)): curr_model = src_models[i] logits[i] = curr_model(x) if args.loss_type == 'xent': loss, grad = gen_grad_ens(x, logits, y) elif args.loss_type == 'cw': grad = gen_grad_cw(x, logits, y) if args.targeted_flag == 1: grad = -1.0 * grad for eps in eps_list: # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"] and args.norm == 'linf': adv_x = symbolic_fgs(x, grad, eps=eps) elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2': adv_x = symbolic_fg(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": l = 1000 X_test = X_test[0:l] Y_test = Y_test[0:l] adv_x = x # iteratively apply the FGSM with small step size for i in range(args.num_iter): adv_logits = [None] * len(src_model_names) for i in range(len(src_model_names)): curr_model = src_models[i] adv_logits[i] = curr_model(adv_x) if args.loss_type == 'xent': loss, grad = gen_grad_ens(adv_x, adv_logits, y) elif args.loss_type == 'cw': grad = gen_grad_cw(adv_x, adv_logits, y) if args.targeted_flag == 1: grad = -1.0 * grad adv_x = symbolic_fgs(adv_x, grad, args.delta, True) r = adv_x - x r = K.clip(r, -eps, eps) adv_x = x + r adv_x = K.clip(adv_x, 0, 1) if attack == "CW_ens": l = 1000 pickle_name = attack + '_' + src_model_name_joint + '_' + str( args.eps) + '_adv.p' print(pickle_name) Y_test = Y_test[0:l] if os.path.exists(pickle_name) and attack == "CW_ens": print 'Loading adversarial samples' X_adv = pickle.load(open(pickle_name, 'rb')) for (name, src_model) in zip(src_model_names, src_models): preds_adv, _, err = tf_test_error_rate( src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(name), err) preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(target_model_name), err) return X_test = X_test[0:l] time1 = time() cli = CarliniLiEns(K.get_session(), src_models, targeted=False, confidence=args.kappa, eps=eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -eps, eps) X_adv = X_test + r time2 = time() print("Run with Adam took {}s".format(time2 - time1)) if SAVE_FLAG == True: pickle.dump(X_adv, open(pickle_name, 'wb')) for (name, src_model) in zip(src_model_names, src_models): print('Carrying out white-box attack') pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(name), err) if target_model_name is not None: print('Carrying out black-box attack') preds, orig, err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(target_model_name), err) return pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str( eps) + '_adv.p' if args.targeted_flag == 1: pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str( eps) + '_adv_t.p' if os.path.exists(pickle_name): print 'Loading adversarial samples' X_adv = pickle.load(open(pickle_name, 'rb')) else: print 'Generating adversarial samples' X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] if SAVE_FLAG == True: pickle.dump(X_adv, open(pickle_name, 'wb')) avg_l2_perturb = np.mean( np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1)) # white-box attack l = len(X_adv) print('Carrying out white-box attack') for (name, src_model) in zip(src_model_names, src_models): preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l]) if args.targeted_flag == 1: err = 100.0 - err print '{}->{}: {:.1f}'.format(basename(name), basename(name), err) # black-box attack if target_model_name is not None: print('Carrying out black-box attack') preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test) if args.targeted_flag == 1: err = 100.0 - err print '{}->{}: {:.1f}, {}, {} {}'.format( src_model_name_joint, basename(target_model_name), err, avg_l2_perturb, eps, attack)