def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if (args['dataset'] == "cifar"): data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if (args['adversarial'] != "none"): model = MNISTModel("models/mnist_cw" + str(args['adversarial']), sess) if (args['temp'] and args['dataset'] == 'mnist'): model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if (args['temp'] and args['dataset'] == 'cifar'): model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], inception=inception, handpick=handpick, train=args['train'], seed=args['seed']) timestart = time.time() if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs) / args['batch_size'], "random instances.") if (args['train']): np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) return r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for i in range(0, len(inputs), args['batch_size']): pred = [] for j in range(i, i + args['batch_size']): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l2 = 1e10 dist_linf = 1e10 dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + args['batch_size']) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show']): for j in range(i, i + args['batch_size']): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) print('best_case_L1_mean', np.mean(d_best_l1)) print('best_case_L2_mean', np.mean(d_best_l2)) print('best_case_Linf_mean', np.mean(d_best_linf)) print('best_case_prob', np.mean(r_best)) print('average_case_L1_mean', np.mean(d_average_l1)) print('average_case_L2_mean', np.mean(d_average_l2)) print('average_case_Linf_mean', np.mean(d_average_linf)) print('average_case_prob', np.mean(r_average)) print('worst_case_L1_mean', np.mean(d_worst_l1)) print('worst_case_L2_mean', np.mean(d_worst_l2)) print('worst_case_Linf_mean', np.mean(d_worst_linf)) print('worst_case_prob', np.mean(r_worst))
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) image_id_set = np.random.choice(range(1000), args["image_number"] * 3, replace=False) #image_id_set = np.random.randint(1, 1000, args["image_number"] ) arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet(SEED), InceptionModel(sess, True) else: print('Please specify a valid dataset') succ_count, ii, iii = 0, 0, 0 final_distortion_count,first_iteration_count, first_distortion_count = [], [], [] while iii < args["image_number"]: ii = ii + 1 image_id = image_id_set[ii] # if image_id!= 836: continue # for test only orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) ## orig_class: predicted label; if arg_targeted_attack: ### target attack target_label = np.remainder(orig_class + 1, 10) else: target_label = orig_class orig_img, target = util.generate_data(data, image_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] true_label_list = np.argmax(data.test_labels, axis=1) true_label = true_label_list[image_id] print("Image ID:{}, infer label:{}, true label:{}".format( image_id, orig_class, true_label)) if true_label != orig_class: print( "True Label is different from the original prediction, pass!" ) continue else: iii = iii + 1 print('\n', iii, '/', args["image_number"]) ## parameter d = orig_img.size # feature dim print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (1, d)) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation #delta_adv = np.random.uniform(-16/255,16/255,(1,d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = np.arctanh( 2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) * 0.999999) else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5) # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.9 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[ i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt v_hat = np.maximum(v_hat, v) #print(np.mean(v_hat)) delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[0, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[0, target_label] = 0 other_prob = np.amax(attack_prob_tmp) if args["print_iteration"]: if np.remainder(i + 1, 1) == 0: if true_label != np.argmax(attack_prob): print( "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) else: print( "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( image_id, arg_mode, true_label, np.argmax(attack_prob), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if (np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if (np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data succ_count = succ_count + 1 final_distortion_count.append(l2s_loss_all[-1]) first_distortion_count.append(first_distortion) first_iteration_count.append(first_iteration) suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush() print('succ rate:', succ_count / args["image_number"]) print('average first success l2', np.mean(first_distortion_count)) print('average first itrs', np.mean(first_iteration_count)) print('average l2:', np.mean(final_distortion_count), ' best l2:', np.min(final_distortion_count), ' worst l2:', np.max(final_distortion_count))
## This program is licenced under the BSD 2-Clause licence, ## contained in the LICENCE file in this directory. from setup_cifar import CIFAR, CIFARModel from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np BATCH_SIZE = 1 with tf.Session() as sess: data, model = MNIST(), MNISTModel("models/mnist", sess) data, model = CIFAR(), CIFARModel("models/cifar", sess) data, model = ImageNet(), InceptionModel(sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.test_data), BATCH_SIZE): pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i + BATCH_SIZE], 1)) print(np.mean(r))
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data = MNIST() inception = False if (args['adversarial'] != "none"): model = MNISTModel( "models/mnist_cw" + str(args['adversarial']), sess) elif (args['temp']): model = MNISTModel( "models/mnist-distilled-" + str(args['temp']), sess) else: model = MNISTModel("models/mnist", sess) if (args['dataset'] == "cifar"): data = CIFAR() inception = False if (args['adversarial'] != "none"): model = CIFARModel( "models/cifar_cw" + str(args['adversarial']), sess) elif (args['temp']): model = CIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) else: model = CIFARModel("models/cifar", sess) if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet'], 2 * args['numimg']), InceptionModel(sess) inception = True inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=not args['untargeted'], target_num=args['targetnum'], inception=inception, train=args['train'], seed=args['seed']) timestart = time.time() if (args['restore_np']): if (args['train']): adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy') else: adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '.npy') else: if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() if args['untargeted']: num_targets = 1 else: num_targets = args['targetnum'] print("Took", timeend - timestart, "seconds to run", len(inputs) / num_targets, "random instances.") if (args['save_np']): if (args['train']): np.save(str(args['dataset']) + '_labels_train.npy', labels) np.save( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy', adv) else: np.save( str(args['dataset']) + '_' + str(args['attack'] + '.npy'), adv) r_best_ = [] d_best_l1_ = [] d_best_l2_ = [] d_best_linf_ = [] r_average_ = [] d_average_l1_ = [] d_average_l2_ = [] d_average_linf_ = [] r_worst_ = [] d_worst_l1_ = [] d_worst_l2_ = [] d_worst_linf_ = [] #Transferability Tests model_ = [] model_.append(model) if (args['targetmodel'] != "same"): if (args['targetmodel'] == "dd_100"): model_.append(MNISTModel("models/mnist-distilled-100", sess)) num_models = len(model_) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for m, model in enumerate(model_): r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] for i in range(0, len(inputs), num_targets): pred = [] for j in range(i, i + num_targets): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l1_index = 1e10 dist_linf = 1e10 dist_linf_index = 1e10 dist_l2 = 1e10 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): success = False if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): success = True else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): success = True if (success): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + num_targets) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) success_average = False if (args['untargeted']): if (np.argmax(pred_r, 1) != np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True else: if (np.argmax(pred_r, 1) == np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True if success_average: r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): failure = True if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): failure = False else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): failure = False if failure: r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show'] and m == (num_models - 1)): for j in range(i, i + num_targets): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) if (m != (num_models - 1)): lbl = "Src_" if (num_models > 2): lbl += str(m) + "_" else: lbl = "Tgt_" if (num_targets > 1): print(lbl + 'best_case_L1_mean', np.mean(d_best_l1)) print(lbl + 'best_case_L2_mean', np.mean(d_best_l2)) print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf)) print(lbl + 'best_case_prob', np.mean(r_best)) print(lbl + 'average_case_L1_mean', np.mean(d_average_l1)) print(lbl + 'average_case_L2_mean', np.mean(d_average_l2)) print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf)) print(lbl + 'average_case_prob', np.mean(r_average)) print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1)) print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2)) print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf)) print(lbl + 'worst_case_prob', np.mean(r_worst)) else: print(lbl + 'L1_mean', np.mean(d_average_l1)) print(lbl + 'L2_mean', np.mean(d_average_l2)) print(lbl + 'Linf_mean', np.mean(d_average_linf)) print(lbl + 'success_prob', np.mean(r_average))
def main(args): temp_encoder = encoder(level=args['level']) with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') #print('all_inputs : ', all_inputs.shape) #print('encoding_all : ',encoding_all.shape) os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 origin_correct = 0 adv_correct = 0 for i in range(all_true_ids.size): print(' adversarial_image_no: ', i) inputs = all_inputs[i:i + 1] encoding_inputs = encoding_all[i:i + 1] #print('encoding_inputs shape: ', encoding_inputs) targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(encoding_inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue origin_correct += np.argmax(labels, 1) == original_class[-1] img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 ##### llj encode_adv = np.transpose(adv, axes=(0, 3, 1, 2)) channel0, channel1, channel2 = encode_adv[:, 0, :, :], encode_adv[:, 1, :, :], encode_adv[:, 2, :, :] channel0, channel1, channel2 = temp_encoder.tempencoding( channel0), temp_encoder.tempencoding( channel1), temp_encoder.tempencoding(channel2) encode_adv = np.concatenate([channel0, channel1, channel2], axis=1) encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1)) #### llj adversarial_predict = model.model.predict(encode_adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) adv_correct += np.argmax(labels, 1) == adversarial_class[-1] success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush() print(' origin accuracy : ', 100.0 * origin_correct / all_true_ids.size) print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def main(args): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=args['targeted'], start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], targeted=args['targeted'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2BB': # score-based ZO-ADMM attack attack = LADMMBB(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], targeted=args['targeted'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early'], gama=args['gama'], epi=args['epi'], alpha=args['alpha']) timestart = time.time() # adv = attack.attack(inputs, targets) adv, querycount, queryl2 = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['targeted']: l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2) else: l2_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2)
else: inputs.append(data.test_data[start + i]) targets.append(data.test_labels[start + i]) inputs = np.array(inputs) targets = np.array(targets) return inputs, targets if __name__ == "__main__": with tf.Session() as sess: use_log = True # data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) data, model = ImageNet(), InceptionModel(sess, use_log) attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=15000, confidence=0, use_log=use_log) inputs, targets = generate_data(data, samples=1, targeted=True, start=6, inception=False) inputs = inputs[1:2] targets = targets[1:2] timestart = time.time()
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L0A': attack = ADMML0(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[ 'attack'] != 'L0C' and args['attack'] != 'L0AE2': l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids) else: l0_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids)
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'], samplesT=args['numimgT'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2LA2': attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], layernum=args['layer_number'], use_kernel=args['use_kernel'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['conf'] != 0: model = MNISTModel("models/mnist-distilled-100", sess) if args['kernel_bias']: EP = evaluate_perturbation_kb(args, sess, model, inputs) scores, l2 = EP(inputs, targets, adv) EPT = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores = EPT(data.test_data, data.test_labels) EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs) scores2 = EP2(inputs, targets, adv) EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores2 = EPT2(data.test_data, data.test_labels) else: EP = evaluate_perturbation(args, sess, model, inputs) # scores = EP(inputs, targets, adv) # scores2 = EP2(inputs, targets, adv) score_count = [] score_count2 = [] score_count3 = [] score_count4 = [] for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(targets[e]): score_count.append(1) if e < args['numimg']: score_count4.append(1) else: score_count.append(0) if e < args['numimg']: score_count4.append(0) for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(labels[e]): score_count3.append(1) else: score_count3.append(0) for e, (sc2) in enumerate(scores2): if np.argmax(sc2) == np.argmax(labels[e]): score_count2.append(1) else: score_count2.append(0) test_score_count = [] test_score_count2 = [] for e, (tsc) in enumerate(test_scores): if np.argmax(tsc) == np.argmax(data.test_labels[e]): test_score_count.append(1) else: test_score_count.append(0) for e, (tsc2) in enumerate(test_scores2): if np.argmax(tsc2) == np.argmax(data.test_labels[e]): test_score_count2.append(1) else: test_score_count2.append(0) l0s = np.count_nonzero(adv) successrate = np.mean(score_count) successrate2 = np.mean(score_count2) successrate3 = np.mean(score_count3) test_successrate = np.mean(test_score_count) test_successrate2 = np.mean(test_score_count2) print('original model, success rate of T images for the original labels:', successrate2) print('modified model, success rate of T images for the original labels:', successrate3) print('modified model, success rate of T images for the target labels:', successrate) print('modified model, success rate of S imges for the target labels:', np.mean(score_count4)) print('modified model, success rate of test set for the original labels:', test_successrate) print('original model, success rate of test set for the original labels:', test_successrate2) print('l0 distance:', l0s) print('l2 distance:', l2)
flags.DEFINE_float('alpha', default=0.20, help='Step size') flags.DEFINE_integer('pop_size', default=6, help='Population size') flags.DEFINE_integer('max_steps', default=10000, help='Maximum number of iterations') flags.DEFINE_integer('resize_dim', None, 'Reduced dimension for dimensionality reduction') flags.DEFINE_bool('adaptive', True, 'Turns on the dynamic scaling of mutation prameters') flags.DEFINE_string('model', 'inception', 'model name') flags.DEFINE_integer('target', None, 'target class. if not provided will be random') FLAGS = flags.FLAGS if __name__ == '__main__': # random.seed(FLAGS.seed) # tf.set_random_seed(FLAGS.seed) # np.random.seed(FLAGS.seed) dataset = ImageNet(FLAGS.input_dir) inputs, targets, reals, paths = utils.generate_data(dataset, FLAGS.test_size) with tf.Session() as sess: model = InceptionModel(sess, use_log=True) test_in = tf.placeholder(tf.float32, (1,299,299,3), 'x') test_pred = tf.argmax(model.predict(test_in), axis=1) attack = GenAttack2(model=model, pop_size=FLAGS.pop_size, mutation_rate = FLAGS.mutation_rate, eps=FLAGS.eps, max_steps=FLAGS.max_steps, alpha=FLAGS.alpha, resize_dim=FLAGS.resize_dim,
def main(args): with tf.Session() as sess: print("Loading data and classification model: {}".format( args["dataset"])) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_softmax=True) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_softmax=True) elif args['dataset'] == "imagenet": data, model = ImageNet( data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel( sess, use_softmax=True) elif args['dataset'] == "imagenet_np": data, model = ImageNetDataNP(), InceptionModel(sess, use_softmax=True) if len(data.test_labels) < args["num_img"]: raise Exception("No enough data, only have {} but need {}".format( len(data.test_labels), args["num_img"])) if args["attack_single_img"]: # manually setup attack set # attacking only one image with random attack] orig_img = data.test_data orig_labels = data.test_labels orig_img_id = np.array([1]) if args["attack_type"] == "targeted": target_labels = [ np.eye(model.num_labels)[args["single_img_target_label"]] ] else: target_labels = orig_labels else: # generate attack set if args["dataset"] == "imagenet" or args[ "dataset"] == "imagenet_np": shift_index = True else: shift_index = False if args["random_target"] and (args["dataset"] == "imagenet" or args["dataset"] == "imagenet_np"): # find all possible class all_class = np.unique(np.argmax(data.test_labels, 1)) all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=all_class, shift_index=shift_index) elif args["random_target"]: # random target on all possible classes class_num = data.test_labels.shape[1] all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=list(range(class_num)), shift_index=shift_index) else: all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], shift_index=shift_index) # check attack data # for i in range(len(orig_img_id)): # tar_lab = np.argmax(target_labels[i]) # orig_lab = np.argmax(orig_labels[i]) # print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i])) # attack related settings if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_rv": if args["img_resize"] is None: args["img_resize"] = model.image_size print( "Argument img_resize is not set and not using autoencoder, set to image original size:{}" .format(args["img_resize"])) if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae": if args["batch_size"] is None: args["batch_size"] = 128 print( "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}." .format(args["batch_size"])) else: if args["batch_size"] is not None: print("Argument batch_size is not used") args["batch_size"] = 1 # force to be 1 if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom": #_, decoder = util.load_codec(args["codec_prefix"]) if args["dataset"] == "mnist" or args["dataset"] == "cifar10": codec = CODEC(model.image_size, model.num_channels, args["compress_mode"], use_tanh=False) else: codec = CODEC(128, model.num_channels, args["compress_mode"]) print(args["codec_prefix"]) codec.load_codec(args["codec_prefix"]) decoder = codec.decoder print(decoder.input_shape) args["img_resize"] = decoder.input_shape[1] print("Using autoencoder, set the attack image size to:{}".format( args["img_resize"])) # setup attack if args["attack_method"] == "zoo": blackbox_attack = ZOO(sess, model, args) elif args["attack_method"] == "zoo_ae": blackbox_attack = ZOO_AE(sess, model, args, decoder) elif args["attack_method"] == "zoo_rv": blackbox_attack = ZOO_RV(sess, model, args) elif args["attack_method"] == "autozoom": blackbox_attack = AutoZOOM(sess, model, args, decoder, codec) save_prefix = os.path.join(args["save_path"], args["dataset"], args["attack_method"], args["attack_type"]) os.system("mkdir -p {}".format(save_prefix)) total_success = 0 l2_total = 0 for i in range(all_orig_img_id.size): orig_img = all_orig_img[i:i + 1] target = all_target_labels[i:i + 1] label = all_orig_labels[i:i + 1] target_class = np.argmax(target) true_class = np.argmax(label) test_index = all_orig_img_id[i] # print information print( "[Info][Start]{}: test_index:{}, true label:{}, target label:{}" .format(i, test_index, true_class, target_class)) if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom": #print ae info if args["dataset"] == "mnist" or args["dataset"] == "cifar10": temp_img = all_orig_img[i:i + 1] else: temp_img = all_orig_img[i] temp_img = (temp_img + 0.5) * 255 temp_img = scipy.misc.imresize(temp_img, (128, 128)) temp_img = temp_img / 255 - 0.5 temp_img = np.expand_dims(temp_img, axis=0) encode_img = codec.encoder.predict(temp_img) decode_img = codec.decoder.predict(encode_img) diff_img = (decode_img - temp_img) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) timestart = time.time() adv_img = blackbox_attack.attack(orig_img, target) timeend = time.time() if len(adv_img.shape) == 3: adv_img = np.expand_dims(adv_img, axis=0) l2_dist = np.sum((adv_img - orig_img)**2)**.5 adv_class = np.argmax(model.model.predict(adv_img)) success = False if args["attack_type"] == "targeted": if adv_class == target_class: success = True else: if adv_class != true_class: success = True if success: total_success += 1 l2_total += l2_dist print( "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}" .format(i, test_index, true_class, adv_class, success, l2_dist, total_success / (i + 1), 0 if total_success == 0 else l2_total / total_success)) # save images suffix = "id{}_testIndex{}_true{}_adv{}".format( i, test_index, true_class, adv_class) # original image save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix)) util.save_img(orig_img, save_name) save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix)) np.save(save_name, orig_img) # adv image save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix)) util.save_img(adv_img, save_name) save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix)) np.save(save_name, adv_img) # diff image save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix)) util.save_img((adv_img - orig_img) / 2, save_name) save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix)) np.save(save_name, adv_img - orig_img)
def main(args): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MadryMNISTModel("models/secret/", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) #data, model = CIFAR(), MadryCIFARModel("models/model_0/", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel( sess, False) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = MadryCIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=True, target_num=args['target_number'], start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'LiCW': attack = CarliniLi(sess, model, max_iterations=args['maxiter'], abort_early=args['abort_early']) if args['attack'] == 'L2A': attack = ADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) if args['attack'] == 'L2AE': attack = ADMML2en(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], ro=args['ro'], iteration_steps=args['iteration_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2LA': attack = LADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) if args['attack'] == 'L2LAST': attack = LADMMSTL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early'], retrain=args['retrain']) if args['attack'] == 'LiIF': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) if args['attack'] == 'LiF': attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) if args['attack'] == 'L1': attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) if args['attack'] == 'L1EN': attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) if args['attack'] == 'L1IFGM': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) if args['attack'] == 'L2IFGM': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) if args['attack'] == 'L1FGM': attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) if args['attack'] == 'L2FGM': attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) #if (args['conf'] != 0): # model = MNISTModel("models/mnist-distilled-100", sess) l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids)
inputs.append(data.test_data[start + i]) targets.append(data.test_labels[start + i]) inputs = np.array(inputs) targets = np.array(targets) new_inputs = np.array(new_inputs) new_targets = np.array(new_targets) return inputs, targets, new_inputs, new_targets if __name__ == "__main__": with tf.Session() as sess: #data, model = MNIST(), MNISTModel("models/mnist", sess) #data, model = CIFAR(), CIFARModel("models/cifar", sess) data = ImageNet() model = InceptionModel(sess) attack = CarliniL2(sess, model, batch_size=1, max_iterations=1000, confidence=0) #attack = CarliniL0(sess, model, max_iterations=1000, initial_const=10, # largest_const=15) inputs, targets, new_inputs, new_targets = generate_data( data, samples=10, targeted=True, start=0, inception=True) """ #total_attack = [] #target_all = np.zeros( (10,1008) ) #D2 = new_inputs[9] #D2 = D2 + .5
def main(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 for i in range(all_true_ids.size): inputs = all_inputs[i:i + 1] targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 adversarial_predict = model.model.predict(adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) with open(args['save'] + "/report.txt", 'a') as f: f.write("*" * 20) to_write = "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}".format( img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success) f.write(to_write) f.write("*" * 20) f.write("\n\n") sys.stdout.flush()