def function_evaluation_uncons(x, kappa, target_label, const, model, orig_img, arg_targeted_attack): # x in real value (unconstrained form), img_vec is in [-0.5, 0.5] img_vec = 0.5 * np.tanh(x) / 0.999999 img = np.resize(img_vec, orig_img.shape) orig_prob, orig_class, orig_prob_str = util.model_prediction(model, img) tmp = orig_prob.copy() tmp[0, target_label] = 0 if arg_targeted_attack: # targeted attack, target_label is false label Loss1 = const * np.max([ np.log(np.amax(tmp) + 1e-10) - np.log(orig_prob[0, target_label] + 1e-10), -kappa ]) else: # untargeted attack, target_label is true label Loss1 = const * np.max([ np.log(orig_prob[0, target_label] + 1e-10) - np.log(np.amax(tmp) + 1e-10), -kappa ]) Loss2 = np.linalg.norm(img - orig_img)**2 return Loss1 + Loss2, Loss2
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet_Universal(SEED), InceptionModel(sess, True) #model = InceptionModel(sess, True) else: print('Please specify a valid dataset') #orig_img = np.load('ori_img_backup.npy') orig_img = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id)] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class = util.model_prediction_u( model, orig_img[:30] ) # take 30 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img = orig_img[np.where(orig_class == class_id)] if orig_img.shape[0] < arg_bsz: assert 'no enough valid inputs' orig_img = orig_img[:arg_bsz] np.save('original_imgsID' + str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label = class_id if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label = target_id else: target_label = true_label #orig_img, target = util.generate_data(data, class_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] ## parameter if orig_img.ndim == 3 or orig_img.shape[0] == 1: d = orig_img.size # feature dim else: d = orig_img[0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[:, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label] = 0 other_prob = np.amax(attack_prob_tmp, 1) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]), delta_adv) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: if (true_label != np.argmax(attack_prob, 1)).all(): print( "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1))) else: sr = np.sum( true_label != np.argmax(attack_prob, 1)) / arg_bsz print( "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1), sr)) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush()