def so(model, x, y, steps, eps, alpha=0, norm="l2", sd=0.0): adv_x = x for i in range(steps): total_grad = 0 for j in range(10): temp_adv_x = adv_x + tf.random_normal( stddev=sd, shape=tf.shape(x), seed=42) logits = model(temp_adv_x) if norm == "linf": grad = gen_grad(temp_adv_x, logits, y, loss='logloss') elif norm == "l2": grad = gen_grad(temp_adv_x, logits, y, loss='cw') total_grad += grad if norm == "linf": normed_grad = K.sign(total_grad) adv_x += alpha * normed_grad adv_x = tf.clip_by_value(adv_x, x - eps, x + eps) if norm == "l2": grad_norm = tf.clip_by_value(l2_norm(total_grad), 1e-8, np.inf) adv_x += 2.5 * eps / steps * total_grad / grad_norm dx = adv_x - x dx_norm = tf.clip_by_value(l2_norm(dx), 1e-8, np.inf) dx_final_norm = tf.clip_by_value(dx_norm, 0, eps) adv_x = x + dx_final_norm * dx / dx_norm adv_x = tf.clip_by_value(adv_x, 0, 1) return adv_x
def momentum_fgs(model, x, y, eps): # parameters nb_iter = 10 decay_factor = 1.0 eps_iter = eps / 5.0 # Initialize loop variables momentum = tf.zeros_like(x) adv_x = x for i in range(nb_iter): logits = model(adv_x) grad = gen_grad(adv_x, logits, y) # Normalize current gradient and add it to the accumulated gradient red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-12, grad.dtype) grad = grad / tf.maximum(avoid_zero_div, tf.reduce_mean(tf.abs(grad), red_ind, keepdims=True)) momentum = decay_factor * momentum + grad normalized_grad = tf.sign(momentum) # Update and clip adversarial example in current iteration scaled_grad = eps_iter * normalized_grad adv_x = adv_x + scaled_grad adv_x = x + tf.clip_by_value(adv_x-x, -eps, eps) adv_x = tf.clip_by_value(adv_x, 0., 1.0) adv_x = K.stop_gradient(adv_x) return adv_x
def main(model_name, adv_model_names, model_type): np.random.seed(0) assert keras.backend.backend() == "tensorflow" # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() data_gen = data_gen_mnist(X_train) x = K.placeholder(shape=(None, 28, 28, 1)) y = K.placeholder(shape=(BATCH_SIZE, 10)) eps = args.eps norm = args.norm # if src_models is not None, we train on adversarial examples that come # from multiple models adv_models = [None] * len(adv_model_names) ens_str = '' for i in range(len(adv_model_names)): adv_models[i] = load_model(adv_model_names[i]) if len(adv_models) > 0: name = basename(adv_model_names[i]) model_index = name.replace('model', '') ens_str += model_index model = model_mnist(type=model_type) x_advs = [None] * (len(adv_models) + 1) for i, m in enumerate(adv_models + [model]): if args.iter == 0: logits = m(x) grad = gen_grad(x, logits, y, loss='training') x_advs[i] = symbolic_fgs(x, grad, eps=eps) elif args.iter == 1: x_advs[i] = iter_fgs(m, x, y, steps=40, alpha=0.01, eps=args.eps) # Train an MNIST model tf_train(x, y, model, X_train, Y_train, data_gen, x_advs=x_advs, benign=args.ben) # Finally print the result! test_error = tf_test_error_rate(model, x, X_test, Y_test) print('Test error: %.1f%%' % test_error) model_name += '_' + str(eps) + '_' + str(norm) + '_' + ens_str if args.iter == 1: model_name += 'iter' if args.ben == 0: model_name += '_nob' save_model(model, model_name) json_string = model.to_json() with open(model_name + '.json', 'wr') as f: f.write(json_string)
def main(model_name, adv_model_names, model_type): np.random.seed(0) assert keras.backend.backend() == "tensorflow" set_flags(32) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) flags.DEFINE_integer('NUM_EPOCHS', args.epochs, 'Number of epochs') flags.DEFINE_integer('type', args.type, 'model type') # Get MNIST test data X_train, Y_train, X_test, Y_test = load_data() data_gen = data_flow(X_train) x = K.placeholder(shape=(None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES)) eps = args.eps # if src_models is not None, we train on adversarial examples that come # from multiple models adv_models = [None] * len(adv_model_names) for i in range(len(adv_model_names)): adv_models[i] = load_model(adv_model_names[i]) model = model_select(type=model_type) x_advs = [None] * (len(adv_models) + 1) for i, m in enumerate(adv_models + [model]): x_noise = x + tf.random_uniform(shape=[FLAGS.BATCH_SIZE, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS], minval= -args.eps, maxval=args.eps) x_noise = tf.clip_by_value(x_noise, 0., 1.) for _ in range(args.k): logits = m(x_noise) grad = gen_grad(x_noise, logits, y, loss='logloss') x_noise = K.stop_gradient(x_noise + args.eps / 4.0 * K.sign(grad)) x_noise = tf.clip_by_value(x_noise, x - args.eps, x + args.eps) x_noise = tf.clip_by_value(x_noise, 0., 1.) x_advs[i] = x_noise # Train an MNIST model tf_train(x, y, model, X_train, Y_train, data_gen, model_name, x_advs=x_advs) # Finally print the result! test_error = tf_test_error_rate(model, x, X_test, Y_test) with open(model_name + '_log.txt', 'a') as log: log.write('Test error: %.1f%%' % test_error) print('Test error: %.1f%%' % test_error) save_model(model, model_name) json_string = model.to_json() with open(model_name+'.json', 'w') as f: f.write(json_string)
def iter_fgs(model, data, labels, steps, eps): ''' I-FGSM attack. ''' adv_x = data # iteratively apply the FGSM with small step size for i in range(steps): grad = gen_grad(adv_x, model, labels) adv_x = symbolic_fgs(adv_x, grad, eps) return adv_x
def iter_fgs(model, x, y, steps, eps): """ I-FGSM attack. """ adv_x = x # iteratively apply the FGSM with small step size for i in range(steps): logits = model(adv_x) grad = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, grad, eps, True) return adv_x
def iter_fgs(model, x, y, steps, eps, alpha): """ PGD / I-FGSM attack. """ adv_x = x # iteratively apply the FGSM with small step size for i in range(steps): logits = model(adv_x) grad = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, grad, alpha, True) adv_x = tf.clip_by_value(adv_x, x - eps, x + eps) return adv_x
def main(model_name, adv_model_names, model_type): np.random.seed(0) assert keras.backend.backend() == "tensorflow" set_mnist_flags() flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs') # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() data_gen = data_gen_mnist(X_train) x = K.placeholder(shape=(None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES)) eps = args.eps # if src_models is not None, we train on adversarial examples that come # from multiple models adv_models = [None] * len(adv_model_names) for i in range(len(adv_model_names)): adv_models[i] = load_model(adv_model_names[i]) model = model_mnist(type=model_type) x_advs = [None] * (len(adv_models) + 1) for i, m in enumerate(adv_models + [model]): logits = m(x) grad = gen_grad(x, logits, y, loss='training') x_advs[i] = symbolic_fgs(x, grad, eps=eps) # Train an MNIST model tf_train(x, y, model, X_train, Y_train, data_gen, x_advs=x_advs) # Finally print the result! test_error = tf_test_error_rate(model, x, X_test, Y_test) print('Test error: %.1f%%' % test_error) save_model(model, model_name) json_string = model.to_json() with open(model_name + '.json', 'wr') as f: f.write(json_string)
def iter_fgs(model, x, y, steps, alpha, eps, clipping=True): """ I-FGSM attack. """ adv_x = x # iteratively apply the FGSM with small step size for i in range(steps): logits = model(adv_x) grad = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, grad, alpha, True) r = adv_x - x r = K.clip(r, -eps, eps) adv_x = x + r if clipping: adv_x = K.clip(adv_x, 0, 1) return adv_x
def main(adv_model_names): np.random.seed(0) assert keras.backend.backend() == "tensorflow" set_model_flags() tf.reset_default_graph() g = tf.get_default_graph() x = tf.placeholder( tf.float32, shape=[None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS]) y = tf.placeholder(tf.float32, shape=[None, FLAGS.NUM_CLASSES]) train_mode = tf.placeholder(tf.bool) eps = FLAGS.EPS # if src_models is not None, we train on adversarial examples that come # from multiple models # load from out source adv_models = [None] * len(adv_model_names) for i, name in enumerate(adv_model_names): adv_models[i] = load_model(name, path="./models/" + name + "-save.npy") x_advs = [None] * (len(adv_models)) for i, m in enumerate(adv_models): logits, _ = m(x) grad = gen_grad(x, logits, y, loss='training') x_advs[i] = symbolic_fgs(x, grad, eps=eps) data = dataset(FLAGS.DIR, normalize=False) sess, graph_dict = tf_train(g, x, y, data, defense_model, train_mode, x_advs=x_advs) # Finally print the result! test_error = tf_test_error_rate(sess, graph_dict, data, x_advs) print('Test error: %.1f%%' % test_error)
def white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test, X_test_ini, targets, targets_cat, eps, dim, beta): #Get gradient from model if args.loss_type == 'xent': grad = gen_grad(x, logits, y) elif args.loss_type == 'cw': real = tf.reduce_sum(y*logits, 1) other = tf.reduce_max((1-y)*logits - (y*10000), 1) if '_un' in args.method: loss = tf.maximum(0.0,real-other+args.conf) else: loss = tf.maximum(0.0,other-real+args.conf) grad, = tf.gradients(loss, x) # normalized gradient if args.norm == 'linf': normed_grad = tf.sign(grad) elif args.norm == 'l2': normed_grad = K.l2_normalize(grad, axis = (1,2,3)) # Multiply by constant epsilon scaled_grad = beta * normed_grad # Add perturbation to original example to obtain adversarial example if args.loss_type == 'xent': if '_un' in args.method: adv_x_t = tf.stop_gradient(x + scaled_grad) else: adv_x_t = tf.stop_gradient(x - scaled_grad) elif args.loss_type == 'cw': adv_x_t = tf.stop_gradient(x - scaled_grad) adv_x_t = tf.clip_by_value(adv_x_t, CLIP_MIN, CLIP_MAX) X_test_mod = X_test[random_indices] X_test_ini_mod = X_test_ini[random_indices] X_adv_t = np.zeros_like(X_test_ini_mod) adv_pred_np = np.zeros((len(X_test_ini_mod), NUM_CLASSES)) pred_np = np.zeros((len(X_test_ini_mod), NUM_CLASSES)) for i in range(BATCH_EVAL_NUM): X_test_slice = X_test_mod[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] X_test_ini_slice = X_test_ini_mod[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] targets_cat_slice = targets_cat[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] X_adv_curr = X_test_slice for k in range(args.num_iter): X_adv_curr = sess.run(adv_x_t, feed_dict={x: X_adv_curr, y: targets_cat_slice}) r = X_adv_curr - X_test_ini_slice r = np.clip(r, -eps, eps) X_adv_curr = X_test_ini_slice + r X_adv_curr = np.clip(X_adv_curr, CLIP_MIN, CLIP_MAX) X_adv_t[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] = X_adv_curr adv_pred_np_i = sess.run(prediction, feed_dict={x: X_adv_curr}) adv_pred_np[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = adv_pred_np_i pred_np_i, logits_np_i = sess.run([prediction, logits], feed_dict={x: X_test_slice}) pred_np[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = pred_np_i white_box_success = 100.0 * np.sum(np.argmax(adv_pred_np, 1) == targets)/(BATCH_SIZE*BATCH_EVAL_NUM) if '_un' in args.method: white_box_success = 100.0 - white_box_success benign_success = 100.0 * np.sum(np.argmax(pred_np, 1) == targets)/(BATCH_SIZE*BATCH_EVAL_NUM) print('Benign success: {}'.format(benign_success)) wb_norm = np.mean(np.linalg.norm((X_adv_t-X_test_ini_mod).reshape(BATCH_SIZE*BATCH_EVAL_NUM, dim), axis=1)) print('Average white-box l2 perturbation: {}'.format(wb_norm)) wb_write_out(eps, white_box_success, wb_norm) return
def main(attack, src_model_name, target_model_names, data_train_dir, data_test_dir): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches') set_gtsrb_flags() # Get MNIST test data _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir) # One-hot encode image labels label_binarizer = LabelBinarizer() Y_test = label_binarizer.fit_transform(Y_test) x = tf.placeholder(tf.float32, (None, 32, 32, 1)) y = tf.placeholder(tf.int32, (None)) one_hot_y = tf.one_hot(y, 43) # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) return if attack == "grad_ens": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) adv_x = x for j in range(args.steps): for i, m in enumerate(sub_models + [src_model]): logits = m(adv_x) gradient = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, gradient, eps=args.eps / args.steps, clipping=True) # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err)
def main(args): def get_model_type(model_name): model_type = { 'models/modelA': 0, 'models/modelA_adv': 0, 'models/modelA_ens': 0, 'models/modelB': 1, 'models/modelB_adv': 1, 'models/modelB_ens': 1, 'models/modelC': 2, 'models/modelC_adv': 2, 'models/modelC_ens': 2, 'models/modelD': 3, 'models/modelD_adv': 3, 'models/modelD_ens': 3, } if model_name not in model_type.keys(): raise ValueError('Unknown model: {}'.format(model_name)) return model_type[model_name] torch.manual_seed(args.seed) device = torch.device('cuda' if args.cuda else 'cpu') ''' Preprocess MNIST dataset ''' kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../attack_mnist', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../attack_mnist', train=False, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) eps = args.eps # if src_models is not None, we train on adversarial examples that come # from multiple models adv_model_names = args.adv_models adv_models = [None] * len(adv_model_names) for i in range(len(adv_model_names)): type = get_model_type(adv_model_names[i]) adv_models[i] = load_model(adv_model_names[i], type=type).to(device) model = model_mnist(type=args.type).to(device) optimizer = optim.Adam(model.parameters()) # Train on MNIST model x_advs = [None] * (len(adv_models) + 1) for epoch in range(args.epochs): for batch_idx, (data, labels) in enumerate(train_loader): data, labels = data.to(device), labels.to(device) for i, m in enumerate(adv_models + [model]): grad = gen_grad(data, m, labels, loss='training') x_advs[i] = symbolic_fgs(data, grad, eps=eps) train(epoch, batch_idx, model, data, labels, optimizer, x_advs=x_advs) # Finally print the result correct = 0 with torch.no_grad(): for (data, labels) in test_loader: data, labels = data.to(device), labels.to(device) correct += test(model, data, labels) test_error = 100. - 100. * correct / len(test_loader.dataset) print('Test Set Error Rate: {:.2f}%'.format(test_error)) torch.save(model.state_dict(), args.model + '.pkl')
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err) return # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err)
def main(args): def get_model_type(model_name): model_type = { 'models/modelA': 0, 'models/modelA_adv': 0, 'models/modelA_ens': 0, 'models/modelB': 1, 'models/modelB_adv': 1, 'models/modelB_ens': 1, 'models/modelC': 2, 'models/modelC_adv': 2, 'models/modelC_ens': 2, 'models/modelD': 3, 'models/modelD_adv': 3, 'models/modelD_ens': 3, } if model_name not in model_type.keys(): raise ValueError('Unknown model: {}'.format(model_name)) return model_type[model_name] torch.manual_seed(args.seed) device = torch.device('cuda' if args.cuda else 'cpu') ''' Preprocess MNIST dataset ''' kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {} test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../attack_mnist', train=False, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) # source model for crafting adversarial examples src_model_name = args.src_model type = get_model_type(src_model_name) src_model = load_model(src_model_name, type).to(device) # model(s) to target target_model_names = args.target_models target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): type = get_model_type(target_model_names[i]) target_models[i] = load_model(target_model_names[i], type=type).to(device) attack = args.attack # simply compute test error if attack == 'test': correct_s = 0 with torch.no_grad(): for (data, labels) in test_loader: data, labels = data.to(device), labels.to(device) correct_s += test(src_model, data, labels) err = 100. - 100. * correct_s / len(test_loader.dataset) print('Test error of {}: {:.2f}'.format(basename(src_model_name), err)) for (name, target_model) in zip(target_model_names, target_models): correct_t = 0 with torch.no_grad(): for (data, labels) in test_loader: data, labels = data.to(device), labels.to(device) correct_t += test(target_model, data, labels) err = 100. - 100. * correct_t / len(test_loader.dataset) print('Test error of {}: {:.2f}'.format( basename(target_model_names), err)) return eps = args.eps correct = 0 for (data, labels) in test_loader: # take the random step in the RAND+FGSM if attack == 'rand_fgs': data = torch.clamp( data + torch.zeros_like(data).uniform_(-args.alpha, args.alpha), 0.0, 1.0) eps -= args.alpha data, labels = data.to(device), labels.to(device) grad = gen_grad(data, src_model, labels) # FGSM and RAND+FGSM one-shot attack if attack in ['fgs', 'rand_fgs']: adv_x = symbolic_fgs(data, grad, eps=eps) # iterative FGSM if attack == 'ifgs': adv_x = iter_fgs(src_model, data, labels, steps=args.steps, eps=args.eps / args.steps) correct += test(src_model, adv_x, labels) test_error = 100. - 100. * correct / len(test_loader.dataset) print('Test Set Error Rate: {:.2f}%'.format(test_error))
def main(attack, src_model_name, target_model_names, data_train_dir, data_test_dir): np.random.seed(0) tf.set_random_seed(0) set_gtsrb_flags() # Get GTSRB test data _, _, _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir) # display_leg_sample(X_test) # One-hot encode image labels label_binarizer = LabelBinarizer() Y_test = label_binarizer.fit_transform(Y_test) x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) # one_hot_y = tf.one_hot(y, 43) # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:200] Y_test = Y_test[0:200] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r np.save('Train_Carlini_200.npy', X_adv) np.save('Label_Carlini_200.npy', Y_test) err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) display_leg_adv_sample(X_test, X_adv) return if attack == "cascade_ensemble": # X_test = np.clip( # X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), # 0.0, 1.0) # eps -= args.alpha sub_model_ens = (sub_model_2, sub_model_3) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) adv_x = x for j in range(args.steps): for i, m in enumerate(sub_models + [src_model]): logits = m(adv_x) gradient = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, gradient, eps=args.eps / args.steps, clipping=True) if attack == "Iter_Casc": # X_test = np.clip( # X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), # 0.0, 1.0) # args.eps = args.eps - args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) x_advs = [None] * len(sub_models) errs = [None] * len(sub_models) adv_x = x eps_all = [] for i in range(args.steps): if i == 0: eps_all[0] = (1.0 / len(sub_models)) * args.eps else: for j in range(i): pre_sum = 0.0 pre_sum += eps_all[j] eps_all[i] = (args.eps - pre_sum) * (1.0 / len(sub_models)) # for i in range(args.steps): # if i == 0: # eps_0 = (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_0) # elif i == 1: # eps_1 = (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_1) # elif i == 2: # eps_2 = (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_2) # elif i == 3: # eps_3 = (1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models)) * args.eps # eps_all.append(eps_3) # elif i == 4: # eps_4 = (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_4) # elif i == 5: # eps_5 = (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models)))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_5) # elif i == 6: # eps_6 = (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models))))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_6) # # elif i == 7: # eps_7 = (1 - (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models)))))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_7) # elif i == 8: # eps_8 = (1 - (1 - (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models))))))) * (1.0 / len(sub_models)) * args.eps # eps_all.append(eps_8) # elif i == 9: # eps_9 = (1 - (1 - (1 - (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models)))))))) * ( # 1.0 / len(sub_models)) * args.eps # eps_all.append(eps_9) # elif i == 10: # eps_10 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models))))))))) * ( # 1.0 / len(sub_models)) * args.eps # eps_all.append(eps_10) # elif i == 11: # eps_11 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (1 - ( # 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * ( # 1.0 / len(sub_models)))))))))) * ( # 1.0 / len(sub_models)) * args.eps # eps_all.append(eps_11) for j in range(args.steps): print('iterative step is :', j) if j == 0: for i, m in enumerate(sub_models): logits = m(adv_x) gradient = gen_grad(adv_x, logits, y) adv_x_ = symbolic_fgs(adv_x, gradient, eps=eps_all[j], clipping=True) x_advs[i] = adv_x_ X_adv = batch_eval([x, y], [adv_x_], [X_test, Y_test])[0] err = tf_test_error_rate(m, x, X_adv, Y_test) errs[i] = err adv_x = x_advs[errs.index(min(errs))] else: t = errs.index(min(errs)) print('index of min value of errs:', t) logits = sub_models[t](adv_x) gradient = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, gradient, eps=eps_all[j], clipping=True) for i, m in enumerate(sub_models): X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] err = tf_test_error_rate(m, x, X_adv, Y_test) errs[i] = err print('error rate of each substitute models_oldest: ', errs) print('\t') if min(errs) >= 99: success_rate = sum(errs) / len(sub_models) print('success rate is: {:.3f}'.format(success_rate)) break success_rate = sum(errs) / len(sub_models) print('success rate is: {:.3f}'.format(success_rate)) X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] np.save('results/iter_casc_0.2_leg_adv/X_adv_Iter_Casc_0.2.npy', X_adv) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) save_leg_adv_sample('results/iter_casc_0.2_leg_adv/', X_test, X_adv) # save adversarial example specified by user save_leg_adv_specified_by_user( 'results/iter_casc_0.2_leg_adv_label_4/', X_test, X_adv, Y_test) return if attack == "stack_paral": # X_test = np.clip( # X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), # 0.0, 1.0) # eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) errs = [None] * (len(sub_models) + 1) x_advs = [None] * len(sub_models) # print x_advs for i, m in enumerate(sub_models): # x = x + args.alpha * np.sign(np.random.randn(*x[0].shape)) logits = m(x) gradient = gen_grad(x, logits, y) adv_x = symbolic_fgs(x, gradient, eps=args.eps / 2, clipping=True) x_advs[i] = adv_x # print x_advs adv_x_sum = x_advs[0] for i in range(len(sub_models)): if i == 0: continue adv_x_sum = adv_x_sum + x_advs[i] adv_x_mean = adv_x_sum / (len(sub_models)) preds = src_model(adv_x_mean) grads = gen_grad(adv_x_mean, preds, y) adv_x = symbolic_fgs(adv_x_mean, grads, eps=args.eps, clipping=True) # compute the adversarial examples and evaluate for i, m in enumerate(sub_models + [src_model]): X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] err = tf_test_error_rate(m, x, X_adv, Y_test) errs[i] = err # compute success rate success_rate = sum(errs) / (len(sub_models) + 1) print('success rate is: {:.3f}'.format(success_rate)) # compute transfer rate for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) # save adversarial examples np.save('results/stack_paral_0.2_leg_adv/X_adv_stack_paral_0.2.npy', X_adv) # save_leg_adv_sample(X_test, X_adv) save_leg_adv_sample('results/stack_paral_0.2_leg_adv/', X_test, X_adv) # save adversarial example specified by user save_leg_adv_specified_by_user( 'results/stack_paral_0.2_leg_adv_label_4/', X_test, X_adv, Y_test) return if attack == "cascade_ensemble_2": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) x_advs = [([None] * len(sub_models)) for i in range(args.steps)] # print x_advs x_adv = x for j in range(args.steps): for i, m in enumerate(sub_models): logits = m(x_adv) gradient = gen_grad(x_adv, logits, y) x_adv = symbolic_fgs(x_adv, gradient, eps=args.eps / args.steps, clipping=True) x_advs[j][i] = x_adv # print x_advs adv_x_sum = x_advs[0][0] for j in range(args.steps): for i in range(len(sub_models)): if j == 0 and i == 0: continue adv_x_sum = adv_x_sum + x_advs[j][i] adv_x_mean = adv_x_sum / (args.steps * len(sub_models)) preds = src_model(adv_x_mean) grads = gen_grad(adv_x_mean, preds, y) adv_x = symbolic_fgs(adv_x_mean, grads, eps=args.eps / args.steps, clipping=True) # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err)
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) set_flags(20) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) x = K.placeholder( (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = load_data() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(src_model_name), 100 - err)) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(name), 100 - err)) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rfgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rfgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "pgd": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps, alpha=args.eps / 10.0) if attack == 'mim': adv_x = momentum_fgs(src_model, x, y, eps=args.eps) print('start') # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] print('-----done----') # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), 100 - err)) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), 100 - err))
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) if args.dataset == "mnist": K.set_image_data_format('channels_last') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model_mnist(src_model_name) sd = 0.7 elif args.dataset == "cifar10": set_flags(20) K.set_image_data_format('channels_first') x = K.placeholder( (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = load_data() # source model for crafting adversarial examples src_model = load_model(src_model_name) sd = 100. / 255. # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": acc = tf_test_acc(src_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(src_model_name), acc)) for (name, target_model) in zip(target_model_names, target_models): acc = tf_test_acc(target_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(name), acc)) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rfgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rfgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "pgd": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=eps, alpha=eps / 10.0) if attack == 'so': adv_x = so(src_model, x, y, steps=args.steps, eps=eps, alpha=eps / 10.0, norm=args.norm, sd=sd) print('start') # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # pdb.set_trace() print('-----done----') # white-box attack acc = tf_test_acc(src_model, x, X_adv, Y_test, sd=sd, num_iter=10) with open('attacks.txt', 'a') as log: log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format( basename(src_model_name), basename(src_model_name), acc, eps)) # black-box attack for (name, target_model) in zip(target_model_names, target_models): acc = tf_test_acc(target_model, x, X_adv, Y_test, sd=sd, num_iter=10) with open('attacks.txt', 'a') as log: log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format( basename(src_model_name), basename(name), acc, eps))
def white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test, X_test_ini, targets, targets_cat, eps, dim, beta): #Get gradient from model if args.loss_type == 'xent': grad = gen_grad(x, logits, y) elif args.loss_type == 'cw': real = tf.reduce_sum(y * logits, 1) other = tf.reduce_max((1 - y) * logits - (y * 10000), 1) if '_un' in args.method: loss = tf.maximum(0.0, real - other + args.conf) else: loss = tf.maximum(0.0, other - real + args.conf) grad = K.gradients(loss, [x])[0] # normalized gradient if args.norm == 'linf': normed_grad = K.sign(grad) elif args.norm == 'l2': normed_grad = K.l2_normalize(grad, axis=(1, 2, 3)) # Multiply by constant epsilon scaled_grad = beta * normed_grad # Add perturbation to original example to obtain adversarial example if args.loss_type == 'xent': if '_un' in args.method: adv_x_t = K.stop_gradient(x + scaled_grad) else: adv_x_t = K.stop_gradient(x - scaled_grad) elif args.loss_type == 'cw': adv_x_t = K.stop_gradient(x - scaled_grad) adv_x_t = K.clip(adv_x_t, CLIP_MIN, CLIP_MAX) X_test_ini_mod = X_test_ini[:BATCH_SIZE * BATCH_EVAL_NUM] targets_cat_mod = targets_cat[:BATCH_SIZE * BATCH_EVAL_NUM] targets_mod = targets[:BATCH_SIZE * BATCH_EVAL_NUM] X_adv_t = np.zeros_like(X_test_ini_mod) for i in range(BATCH_EVAL_NUM): X_test_slice = X_test[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)] X_test_ini_slice = X_test_ini[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)] targets_cat_slice = targets_cat[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)] X_adv_curr = X_test_slice for k in range(args.num_iter): X_adv_curr = K.get_session().run([adv_x_t], feed_dict={ x: X_adv_curr, y: targets_cat_slice })[0] r = X_adv_curr - X_test_ini_slice r = np.clip(r, -eps, eps) X_adv_curr = X_test_ini_slice + r X_adv_t[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)] = np.clip( X_adv_curr, CLIP_MIN, CLIP_MAX) adv_pred_np = K.get_session().run([prediction], feed_dict={x: X_adv_t})[0] # _, _, white_box_error = tf_test_error_rate(target_model, x, X_adv_t, targets_cat_mod) white_box_error = 100.0 * np.sum( np.argmax(adv_pred_np, 1) != targets_mod) / adv_pred_np.shape[0] if '_un' not in args.method: white_box_error = 100.0 - white_box_error wb_norm = np.mean( np.linalg.norm( (X_adv_t - X_test_ini_mod).reshape(BATCH_SIZE * BATCH_EVAL_NUM, dim), axis=1)) print('Average white-box l2 perturbation: {}'.format(wb_norm)) wb_write_out(eps, white_box_error, wb_norm) return
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) return if attack == "cascade_ensemble": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4, sub_model_5, sub_model_6, sub_model_7) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) adv_x = x for j in range(args.steps): for i, m in enumerate(sub_models + [src_model]): logits = m(adv_x) gradient = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, gradient, eps=args.eps / args.steps, clipping=True) if attack == "parallel_ensemble": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) x_advs = [([None] * len(sub_models)) for i in range(args.steps)] print x_advs x_adv = x for j in range(args.steps): for i, m in enumerate(sub_models): logits = m(x_adv) gradient = gen_grad(x_adv, logits, y) x_adv = symbolic_fgs(x_adv, gradient, eps=args.eps / args.steps, clipping=True) x_advs[j][i] = x_adv print x_advs adv_x_mean = x_advs[0][0] for j in range(args.steps): for i in range(len(sub_models)): if j == 0 and i == 0: continue adv_x_mean = adv_x_mean + x_advs[j][i] xadv = adv_x_mean / (args.steps * len(sub_models)) preds = src_model(xadv) grads = gen_grad(xadv, preds, y) adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True) # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err)