def load_model_and_dataset(dataset): if dataset == 'mnist': import mnist_NiN_bn model = mnist_NiN_bn.NiN_Model() saver = tf.train.Saver() checkpoint = tf.train.latest_checkpoint( '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/mnist_NiN/' ) saver.restore(sess, checkpoint) data = MNIST() elif dataset == "mnist2": import mnist_model model = mnist_model.MNISTModel() data = MNIST() elif dataset == 'cifar10': import cifar_NiN_bn model = cifar_NiN_bn.NiN_Model() saver = tf.train.Saver() checkpoint = tf.train.latest_checkpoint( '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/cifar_NiN/' ) saver.restore(sess, checkpoint) data = CIFAR() else: raise ValueError('unknown dataset') return model, data
def main(): data, model = MNIST(), MNISTModel(restore="models/mnist", use_log=True) origImgs, origLabels, origImgID = util.generate_attack_data_set(data, model, MGR) delImgAT_Init = np.zeros(origImgs[0].shape) objfunc = ObjectiveFunc.OBJFUNC(MGR, model, origImgs, origLabels) MGR.Add_Parameter('eta', MGR.parSet['alpha']/origImgs[0].size) MGR.Log_MetaData() if(MGR.parSet['optimizer'] == 'ZOSVRG'): delImgAT = svrg.ZOSVRG(delImgAT_Init, MGR, objfunc) elif(MGR.parSet['optimizer'] == 'ZOSGD'): delImgAT = sgd.ZOSGD(delImgAT_Init, MGR, objfunc) else: print('Please specify a valid optimizer') for idx_ImgID in range(MGR.parSet['nFunc']): currentID = origImgID[idx_ImgID] orig_prob = model.model.predict(np.expand_dims(origImgs[idx_ImgID], axis=0)) advImg = np.tanh(np.arctanh(origImgs[idx_ImgID]*1.9999999)+delImgAT)/2.0 adv_prob = model.model.predict(np.expand_dims(advImg, axis=0)) suffix = "id{}_Orig{}_Adv{}".format(currentID, np.argmax(orig_prob), np.argmax(adv_prob)) util.save_img(advImg, "{}/Adv_{}.png".format(MGR.parSet['save_path'], suffix)) util.save_img(np.tanh(delImgAT)/2.0, "{}/Delta.png".format(MGR.parSet['save_path'])) sys.stdout.flush() MGR.logHandler.close()
def main(_): with tf.Session() as sess: K.set_session(sess) if FLAGS.dataset == 'MNIST': data, model = MNIST(), MNISTModel("models/mnist", sess) elif FLAGS.datset == 'Cifar': data, model = CIFAR(), CIFARModel("models/cifar", sess) def _model_fn(x, logits=False): ybar, logits_ = model.predict(x) if logits: return ybar, logits_ return ybar if FLAGS.dataset == 'MNIST': x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02) elif FLAGS.datset == 'Cifar': x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01) X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess) X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess) np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train) np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test) print("Legit/Adversarial training set") model.evaluate(data.train_data, data.train_labels) model.evaluate(X_adv_train, data.train_labels) print("Legit/Adversarial test set") model.evaluate(data.test_data, data.test_labels) model.evaluate(X_adv_test, data.test_labels)
def compare_baseline(): data = MNIST() model = MNISTModel("models/mnist") sess = K.get_session() attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, binary_search_steps=4, targeted=False, initial_const=10) N = 100 test_adv = attack.attack(data.test_data[:N], data.test_labels[:N]) print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))
def reconstruct(n_samples): encoder = load_model('{}_encoder.h5'.format(desc)) decoder = load_model('{}_decoder.h5'.format(desc)) data = MNIST() x_test = data.test_data choice = np.random.choice(np.arange(n_samples)) original = x_test[choice].reshape(1, 784) #normalize = colors.Normalize(0., 255.) #original = normalize(original) latent = encoder.predict(original) reconstruction = decoder.predict(latent) draw([{"title": "Original", "image": original}, {"title": "Reconstruction", "image": reconstruction}])
def main(args): if not os.path.isdir('models'): os.makedirs('models') if args['dataset'] == "mnist" or args['dataset'] == "all": train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50) if args['dataset'] == 'cifar' or args['dataset'] == 'all': train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False): np.random.seed(1215) tf.set_random_seed(1215) random.seed(1215) if norm == '1': attack = EADL1 norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3)) elif norm == '2': attack = CarliniL2 norm_fn = lambda x: np.sum(x**2,axis=(1,2,3)) elif norm == 'i': attack = CarliniLi norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3)) if cifar: data = CIFAR() elif tinyimagenet: data = tinyImagenet() else: data = MNIST() model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2}) inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0) model.predict = model model.num_labels = 10 if cifar: model.image_size = 32 model.num_channels = 3 elif tinyimagenet: model.image_size = 64 model.num_channels = 3 model.num_labels = 200 else: model.image_size = 28 model.num_channels = 1 start_time = timer.time() attack = attack(sess, model, max_iterations = 1000) perturbed_input = attack.attack(inputs, targets) UB = np.average(norm_fn(perturbed_input-inputs)) return UB, (timer.time()-start_time)/len(inputs)
def main(args): # load data print("Loading data", args["dataset"]) if args["dataset"] == "mnist": data = MNIST() if args["train_data_source"]: print("Using data from {}".format(args["train_data_source"])) img = np.load("{}_data.npy".format(args["train_data_source"])) labels = np.load("{}_data.npy".format(args["train_data_source"])) data.validation_data = img data.validation_labels = labels elif args["dataset"] == "cifar10": data = CIFAR() elif args["dataset"] == "fe": data = FACIAL() elif args["dataset"] == "imagenet": # use ImageDataGenerate provided by Keras data = ImageNetDataGen(args["imagenet_train_dir"], args["imagenet_validation_dir"], data_augmentation=False) print("Done...") if args["dataset"] == "imagenet": data_shape = (None, 299, 299, 3) resize = 256 else: data_shape = data.train_data.shape resize = None print("Start training autoencoder") codec = CODEC(img_size=data_shape[1], num_channels=data_shape[3], compress_mode=args["compress_mode"], resize=resize) train_autoencoder(data, codec, batch_size=args["batch_size"], epochs=args["epochs"], saveFilePrefix=args["save_prefix"], train_imagenet=(args["dataset"] == "imagenet"))
def plot(n_samples): encoder = load_model('{}_encoder.h5'.format(desc)) data = MNIST() x_test = data.test_data y_test = data.test_labels x = x_test[:n_samples].reshape(n_samples, 784) y = y_test[:n_samples] #normalize = colors.Normalize(0., 255.) #x = normalize(x) latent = encoder.predict(x) if FLAGS.latent_dim > 2: tsne = TSNE() print("\nFitting t-SNE, this will take awhile...") latent = tsne.fit_transform(latent) fig, ax = plt.subplots() for label in np.arange(10): ax.scatter(latent[(y_test == label), 0], latent[(y_test == label), 1], label=label, s=3) ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) ax.set_aspect('equal') ax.set_title("Latent Space") plt.show(block=False) raw_input("Press Enter to Exit")
def convert(file_name, new_name, cifar=False): if not cifar: eq_weights, new_params = get_weights(file_name) data = MNIST() else: eq_weights, new_params = get_weights(file_name, inp_shape=(32, 32, 3)) data = CIFAR() model = Sequential() model.add(Flatten(input_shape=data.train_data.shape[1:])) for param in new_params: model.add(Dense(param)) model.add(Lambda(lambda x: tf.nn.relu(x))) model.add(Dense(10)) for i in range(len(eq_weights)): try: print(eq_weights[i][0].shape) except: pass model.layers[i].set_weights(eq_weights[i]) sgd = SGD(lr=0.01, decay=1e-5, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.save(new_name) acc = model.evaluate(data.validation_data, data.validation_labels)[1] printlog("Converting CNN to MLP") nlayer = file_name.split('_')[-3][0] filters = file_name.split('_')[-2] kernel_size = file_name.split('_')[-1] printlog( "model name = {0}, numlayer = {1}, filters = {2}, kernel size = {3}". format(file_name, nlayer, filters, kernel_size)) printlog("Model accuracy: {:.3f}".format(acc)) printlog("-----------------------------------") return acc
def main(args): with tf.Session() as sess: random.seed(121) np.random.seed(1211) image_id = args['img_id'] arg_max_iter = args['maxiter'] arg_b = args['binary_steps'] arg_init_const = args['init_const'] arg_mode = args['mode'] arg_kappa = args['kappa'] arg_beta = args['beta'] arg_gamma = args['gamma'] AE_model = util.load_AE("mnist_AE_1") data, model = MNIST(), MNISTModel("models/mnist", sess, False) orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) target_label = orig_class print("Image:{}, infer label:{}".format(image_id, target_label)) orig_img, target = util.generate_data(data, image_id, target_label) attack = AEADEN(sess, model, mode=arg_mode, AE=AE_model, batch_size=1, kappa=arg_kappa, init_learning_rate=1e-2, binary_search_steps=arg_b, max_iterations=arg_max_iter, initial_const=arg_init_const, beta=arg_beta, gamma=arg_gamma) adv_img = attack.attack(orig_img, target) adv_prob, adv_class, adv_prob_str = util.model_prediction( model, adv_img) delta_prob, delta_class, delta_prob_str = util.model_prediction( model, orig_img - adv_img) INFO = "[INFO]id:{}, kappa:{}, Orig class:{}, Adv class:{}, Delta class: {}, Orig prob:{}, Adv prob:{}, Delta prob:{}".format( image_id, arg_kappa, orig_class, adv_class, delta_class, orig_prob_str, adv_prob_str, delta_prob_str) print(INFO) suffix = "id{}_kappa{}_Orig{}_Adv{}_Delta{}".format( image_id, arg_kappa, orig_class, adv_class, delta_class) arg_save_dir = "{}_ID{}_Gamma_{}".format(arg_mode, image_id, arg_gamma) os.system("mkdir -p Results/{}".format(arg_save_dir)) util.save_img( orig_img, "Results/{}/Orig_original{}.png".format(arg_save_dir, orig_class)) util.save_img(adv_img, "Results/{}/Adv_{}.png".format(arg_save_dir, suffix)) util.save_img( np.absolute(orig_img - adv_img) - 0.5, "Results/{}/Delta_{}.png".format(arg_save_dir, suffix)) sys.stdout.flush()
print("training") print('=================') # run training with given dataset, and print progress history = model.fit(inputs, labels, batch_size=batch_size, validation_data=(inputs, labels), epochs=num_epochs, shuffle=True) # # save model to a file # if file_name != None: # model.save(file_name) print('=================') print('finished training') print('==================') return {'model': nlayer_model, 'history': None} if not os.path.isdir('models'): os.makedirs('models') if __name__ == '__main__': print(MNIST().train_data.shape[1:]) train(MNIST(), file_name="models/mnist_5layer_relu", params=[20, 20, 20, 20], num_epochs=50, lr=0.02, decay=1e-4)
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) image_id_set = np.random.choice(range(1000), args["image_number"] * 3, replace=False) #image_id_set = np.random.randint(1, 1000, args["image_number"] ) arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet(SEED), InceptionModel(sess, True) else: print('Please specify a valid dataset') succ_count, ii, iii = 0, 0, 0 final_distortion_count,first_iteration_count, first_distortion_count = [], [], [] while iii < args["image_number"]: ii = ii + 1 image_id = image_id_set[ii] # if image_id!= 836: continue # for test only orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) ## orig_class: predicted label; if arg_targeted_attack: ### target attack target_label = np.remainder(orig_class + 1, 10) else: target_label = orig_class orig_img, target = util.generate_data(data, image_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] true_label_list = np.argmax(data.test_labels, axis=1) true_label = true_label_list[image_id] print("Image ID:{}, infer label:{}, true label:{}".format( image_id, orig_class, true_label)) if true_label != orig_class: print( "True Label is different from the original prediction, pass!" ) continue else: iii = iii + 1 print('\n', iii, '/', args["image_number"]) ## parameter d = orig_img.size # feature dim print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (1, d)) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation #delta_adv = np.random.uniform(-16/255,16/255,(1,d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = np.arctanh( 2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) * 0.999999) else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5) # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.9 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[ i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt v_hat = np.maximum(v_hat, v) #print(np.mean(v_hat)) delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[0, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[0, target_label] = 0 other_prob = np.amax(attack_prob_tmp) if args["print_iteration"]: if np.remainder(i + 1, 1) == 0: if true_label != np.argmax(attack_prob): print( "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) else: print( "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( image_id, arg_mode, true_label, np.argmax(attack_prob), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if (np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if (np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data succ_count = succ_count + 1 final_distortion_count.append(l2s_loss_all[-1]) first_distortion_count.append(first_distortion) first_iteration_count.append(first_iteration) suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush() print('succ rate:', succ_count / args["image_number"]) print('average first success l2', np.mean(first_distortion_count)) print('average first itrs', np.mean(first_iteration_count)) print('average l2:', np.mean(final_distortion_count), ' best l2:', np.min(final_distortion_count), ' worst l2:', np.max(final_distortion_count))
## contained in the LICENCE file in this directory. from setup_mnist import MNIST from mn_utils import prepare_data from worker import AEDetector, SimpleReformer, IdReformer, AttackData, Classifier, Operator, Evaluator import mn_utils as utils detector_I = AEDetector("./defensive_models/MNIST_I", p=2) detector_II = AEDetector("./defensive_models/MNIST_II", p=1) reformer = SimpleReformer("./defensive_models/MNIST_I") id_reformer = IdReformer() classifier = Classifier("./models/example_classifier") detector_dict = dict() detector_dict["I"] = detector_I detector_dict["II"] = detector_II operator = Operator(MNIST(), classifier, detector_dict, reformer) idx = utils.load_obj("example_idx") _, _, Y = prepare_data(MNIST(), idx) f = "example_carlini_0.0" testAttack = AttackData(f, Y, "Carlini L2 0.0") evaluator = Evaluator(operator, testAttack) evaluator.plot_various_confidences("defense_performance", drop_rate={"I": 0.001, "II": 0.001})
# run training with given dataset, and print progress model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') if __name__ == "__main__": import argparse ap = argparse.ArgumentParser() ap.add_argument('-d', '--dataset', type=str, default="mnist") args = vars(ap.parse_args()) if "mnist" in args["dataset"]: MNIST() if "cifar" in args["dataset"]: CIFAR() #train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=1, lr=0.1, decay=1e-3) #train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=1, lr=0.2, decay=1e-3)
r = np.random.random_integers(0,9) l[i,r] = 1 return l def attack(data, name): sess = K.get_session() model = load_model("models/"+name, custom_objects={'fn': fn}) class Wrap: image_size = 28 if "mnist" in name else 32 num_labels = 10 num_channels = 1 if "mnist" in name else 3 def predict(self, x): return model(x) attack = CarliniL2(sess, Wrap(), batch_size=100, max_iterations=10000, binary_search_steps=5, initial_const=1, targeted=True) adv = attack.attack(data.test_data[:100], get_labs(data.test_labels[:100])) np.save("/tmp/"+name, adv) print(np.mean(np.sum((adv-data.test_data[:100])**2,axis=(1,2,3))**.5)) attack(MNIST(), "mnist") attack(MNIST(), "mnist_brelu") attack(MNIST(), "mnist_gaussian") attack(MNIST(), "mnist_gaussian_brelu") attack(CIFAR(), "cifar") attack(CIFAR(), "cifar_brelu") attack(CIFAR(), "cifar_gaussian") attack(CIFAR(), "cifar_gaussian_brelu")
def expandImage(image_data): image_data2 = np.array(image_data) image_data2 = (image_data2 + 0.5) * 255 return image_data2 # In[4]: if __name__ == "__main__": config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: modelPath = '%smodels/mnist' % (nn_robust_attack_root) data, model = MNIST(), MNISTModel(modelPath, sess) attack = CarliniLi(sess, model, max_iterations=1000, targeted=False) inputs, targets = generate_data(data, samples=1000, targeted=False, start=5500, inception=False) original_classified_wrong_number = 0 #number of benign samples that are misclassified disturbed_failure_number = 0 #number of samples that failed to craft corresponding adversarial samples test_number = 0 #number of adversarial samples that we generate TTP = 0 TP = 0 FN = 0
help='number of epochs') parser.add_argument('--overwrite', action='store_true', help='overwrite output file') args = parser.parse_args() print(args) nlayers = len(args.layer_parameters) + 1 if not args.modelfile: file_name = args.modelpath + "/" + args.model + "_" + str( nlayers ) + "layer_" + args.activation + "_" + args.layer_parameters[0] else: file_name = args.modelfile print("Model will be saved to", file_name) if os.path.isfile(file_name) and not args.overwrite: raise RuntimeError("model {} exists.".format(file_name)) if args.model == "mnist": data = MNIST() elif args.model == "cifar": data = CIFAR() train(data, file_name=file_name, params=args.layer_parameters, num_epochs=args.epochs, lr=args.lr, decay=args.wd, activation=args.activation, activation_param=args.leaky_slope, grad_reg=args.gradreg, dropout_rate=args.dropout)
# now train the teacher at the given temperature teacher = train(data, file_name+"_teacher", params, num_epochs, batch_size, train_temp, init=file_name+"_init") # evaluate the labels at temperature t predicted = teacher.predict(data.train_data) y = tf.nn.softmax(predicted/train_temp) print(y) data.train_labels = y # train the student model at temperature t student = train(data, file_name, params, num_epochs, batch_size, train_temp, init=file_name+"_init") # and finally we predict at temperature 1 predicted = student.predict(data.train_data) print(predicted) if not os.path.isdir('models'): os.makedirs('models') train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50) train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50) train_distillation(MNIST(), "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200], num_epochs=50, train_temp=100) train_distillation(CIFAR(), "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256], num_epochs=50, train_temp=100)
model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') if __name__ == "__main__": train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=50, lr=0.1, decay=1e-3) train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=50, lr=0.2, decay=1e-3)
num_epochs=100, batch_size=256, if_save=True): self.model.compile(loss='mean_squared_error', metrics=['mean_squared_error'], optimizer='adam') noise = self.v_noise * np.random.normal(size=np.shape(data.train_data)) noisy_train_data = data.train_data + noise noisy_train_data = np.clip(noisy_train_data, -0.5, 0.5) self.model.fit(noisy_train_data, data.train_data, batch_size=batch_size, validation_data=(data.validation_data, data.validation_data), epochs=num_epochs, shuffle=True) if if_save: self.model.save(os.path.join(self.model_dir, archive_name)) def load(self, archive_name, model_dir=None): if model_dir is None: model_dir = self.model_dir self.model.load_weights(os.path.join(model_dir, archive_name)) if __name__ == '__main__': AE = DAE() AE.train(MNIST(), "mnist")
def main(args): with tf.Session() as sess: print("Loading data and classification model: {}".format( args["dataset"])) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_softmax=True) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_softmax=True) elif args['dataset'] == "imagenet": # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True) data, model = ImageNetDataNP(), InceptionModel(sess, use_softmax=True) # elif args['dataset'] == "imagenet_np": if len(data.test_labels) < args["num_img"]: raise Exception("No enough data, only have {} but need {}".format( len(data.test_labels), args["num_img"])) if args["attack_single_img"]: # manually setup attack set # attacking only one image with random attack] orig_img = data.test_data orig_labels = data.test_labels orig_img_id = np.array([1]) if args["attack_type"] == "targeted": target_labels = [ np.eye(model.num_labels)[args["single_img_target_label"]] ] else: target_labels = orig_labels else: # generate attack set if args["dataset"] == "imagenet" or args[ "dataset"] == "imagenet_np": shift_index = True else: shift_index = False if args["random_target"] and (args["dataset"] == "imagenet" or args["dataset"] == "imagenet_np"): # find all possible class all_class = np.unique(np.argmax(data.test_labels, 1)) all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=all_class, shift_index=shift_index) elif args["random_target"]: # random target on all possible classes class_num = data.test_labels.shape[1] all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=list(range(class_num)), shift_index=shift_index) else: all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], shift_index=shift_index) # check attack data # for i in range(len(orig_img_id)): # tar_lab = np.argmax(target_labels[i]) # orig_lab = np.argmax(orig_labels[i]) # print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i])) # attack related settings if args["attack_method"] == "zoo" or args[ "attack_method"] == "autozoom_bilin": if args["img_resize"] is None: args["img_resize"] = model.image_size print( "Argument img_resize is not set and not using autoencoder, set to image original size:{}" .format(args["img_resize"])) if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae": if args["batch_size"] is None: args["batch_size"] = 128 print( "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}." .format(args["batch_size"])) else: if args["batch_size"] is not None: print("Argument batch_size is not used") args["batch_size"] = 1 # force to be 1 if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #_, decoder = util.load_codec(args["codec_prefix"]) if args["dataset"] == "mnist" or args["dataset"] == "cifar10": codec = CODEC(model.image_size, model.num_channels, args["compress_mode"], use_tanh=False) else: codec = CODEC(128, model.num_channels, args["compress_mode"]) print(args["codec_prefix"]) codec.load_codec(args["codec_prefix"]) decoder = codec.decoder print(decoder.input_shape) args["img_resize"] = decoder.input_shape[1] print("Using autoencoder, set the attack image size to:{}".format( args["img_resize"])) # setup attack if args["attack_method"] == "zoo": blackbox_attack = ZOO(sess, model, args) elif args["attack_method"] == "zoo_ae": blackbox_attack = ZOO_AE(sess, model, args, decoder) elif args["attack_method"] == "autozoom_bilin": blackbox_attack = AutoZOOM_BiLIN(sess, model, args) elif args["attack_method"] == "autozoom_ae": blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec) save_prefix = os.path.join(args["save_path"], args["dataset"], args["attack_method"], args["attack_type"]) os.system("mkdir -p {}".format(save_prefix)) total_success = 0 l2_total = 0 for i in range(all_orig_img_id.size): orig_img = all_orig_img[i:i + 1] target = all_target_labels[i:i + 1] label = all_orig_labels[i:i + 1] target_class = np.argmax(target) true_class = np.argmax(label) test_index = all_orig_img_id[i] # print information print( "[Info][Start]{}: test_index:{}, true label:{}, target label:{}" .format(i, test_index, true_class, target_class)) if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #print ae info if args["dataset"] == "mnist" or args["dataset"] == "cifar10": temp_img = all_orig_img[i:i + 1] else: temp_img = all_orig_img[i] temp_img = (temp_img + 0.5) * 255 temp_img = scipy.misc.imresize(temp_img, (128, 128)) temp_img = temp_img / 255 - 0.5 temp_img = np.expand_dims(temp_img, axis=0) encode_img = codec.encoder.predict(temp_img) decode_img = codec.decoder.predict(encode_img) diff_img = (decode_img - temp_img) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) timestart = time.time() adv_img = blackbox_attack.attack(orig_img, target) timeend = time.time() if len(adv_img.shape) == 3: adv_img = np.expand_dims(adv_img, axis=0) l2_dist = np.sum((adv_img - orig_img)**2)**.5 adv_class = np.argmax(model.model.predict(adv_img)) success = False if args["attack_type"] == "targeted": if adv_class == target_class: success = True else: if adv_class != true_class: success = True if success: total_success += 1 l2_total += l2_dist print( "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}" .format(i, test_index, true_class, adv_class, success, l2_dist, total_success / (i + 1), 0 if total_success == 0 else l2_total / total_success)) # save images suffix = "id{}_testIndex{}_true{}_adv{}".format( i, test_index, true_class, adv_class) # original image save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix)) util.save_img(orig_img, save_name) save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix)) np.save(save_name, orig_img) # adv image save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix)) util.save_img(adv_img, save_name) save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix)) np.save(save_name, adv_img) # diff image save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix)) util.save_img((adv_img - orig_img) / 2, save_name) save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix)) np.save(save_name, adv_img - orig_img)
epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return {'model':model, 'history':history} if not os.path.isdir('models'): os.makedirs('models') if __name__ == '__main__': train(MNIST(), file_name="models/mnist_resnet_2", nlayer=2, activation = tf.nn.relu) train(MNIST(), file_name="models/mnist_resnet_3", nlayer=3, activation = tf.nn.relu) train(MNIST(), file_name="models/mnist_resnet_4", nlayer=4, activation = tf.nn.relu) train(MNIST(), file_name="models/mnist_resnet_5", nlayer=5, activation = tf.nn.relu) train(MNIST(), file_name="models/mnist_resnet_2_sigmoid", nlayer=2, activation = tf.sigmoid) train(MNIST(), file_name="models/mnist_resnet_3_sigmoid", nlayer=3, activation = tf.sigmoid) train(MNIST(), file_name="models/mnist_resnet_4_sigmoid", nlayer=4, activation = tf.sigmoid) train(MNIST(), file_name="models/mnist_resnet_5_sigmoid", nlayer=5, activation = tf.sigmoid) train(MNIST(), file_name="models/mnist_resnet_2_tanh", nlayer=2, activation = tf.tanh) train(MNIST(), file_name="models/mnist_resnet_3_tanh", nlayer=3, activation = tf.tanh) train(MNIST(), file_name="models/mnist_resnet_4_tanh", nlayer=4, activation = tf.tanh) train(MNIST(), file_name="models/mnist_resnet_5_tanh", nlayer=5, activation = tf.tanh) train(MNIST(), file_name="models/mnist_resnet_2_atan", nlayer=2, activation = tf.atan)
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if (args['dataset'] == "cifar"): data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if (args['adversarial'] != "none"): model = MNISTModel("models/mnist_cw" + str(args['adversarial']), sess) if (args['temp'] and args['dataset'] == 'mnist'): model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if (args['temp'] and args['dataset'] == 'cifar'): model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], inception=inception, handpick=handpick, train=args['train'], seed=args['seed']) timestart = time.time() if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs) / args['batch_size'], "random instances.") if (args['train']): np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) return r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for i in range(0, len(inputs), args['batch_size']): pred = [] for j in range(i, i + args['batch_size']): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l2 = 1e10 dist_linf = 1e10 dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + args['batch_size']) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show']): for j in range(i, i + args['batch_size']): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) print('best_case_L1_mean', np.mean(d_best_l1)) print('best_case_L2_mean', np.mean(d_best_l2)) print('best_case_Linf_mean', np.mean(d_best_linf)) print('best_case_prob', np.mean(r_best)) print('average_case_L1_mean', np.mean(d_average_l1)) print('average_case_L2_mean', np.mean(d_average_l2)) print('average_case_Linf_mean', np.mean(d_average_linf)) print('average_case_prob', np.mean(r_average)) print('worst_case_L1_mean', np.mean(d_worst_l1)) print('worst_case_L2_mean', np.mean(d_worst_l2)) print('worst_case_Linf_mean', np.mean(d_worst_linf)) print('worst_case_prob', np.mean(r_worst))
if init != None: model.load_weights(init) def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), nb_epoch=num_epochs, shuffle=True) if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') #train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50) train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=1)
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data = MNIST() inception = False if (args['adversarial'] != "none"): model = MNISTModel( "models/mnist_cw" + str(args['adversarial']), sess) elif (args['temp']): model = MNISTModel( "models/mnist-distilled-" + str(args['temp']), sess) else: model = MNISTModel("models/mnist", sess) if (args['dataset'] == "cifar"): data = CIFAR() inception = False if (args['adversarial'] != "none"): model = CIFARModel( "models/cifar_cw" + str(args['adversarial']), sess) elif (args['temp']): model = CIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) else: model = CIFARModel("models/cifar", sess) if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet'], 2 * args['numimg']), InceptionModel(sess) inception = True inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=not args['untargeted'], target_num=args['targetnum'], inception=inception, train=args['train'], seed=args['seed']) timestart = time.time() if (args['restore_np']): if (args['train']): adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy') else: adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '.npy') else: if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() if args['untargeted']: num_targets = 1 else: num_targets = args['targetnum'] print("Took", timeend - timestart, "seconds to run", len(inputs) / num_targets, "random instances.") if (args['save_np']): if (args['train']): np.save(str(args['dataset']) + '_labels_train.npy', labels) np.save( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy', adv) else: np.save( str(args['dataset']) + '_' + str(args['attack'] + '.npy'), adv) r_best_ = [] d_best_l1_ = [] d_best_l2_ = [] d_best_linf_ = [] r_average_ = [] d_average_l1_ = [] d_average_l2_ = [] d_average_linf_ = [] r_worst_ = [] d_worst_l1_ = [] d_worst_l2_ = [] d_worst_linf_ = [] #Transferability Tests model_ = [] model_.append(model) if (args['targetmodel'] != "same"): if (args['targetmodel'] == "dd_100"): model_.append(MNISTModel("models/mnist-distilled-100", sess)) num_models = len(model_) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for m, model in enumerate(model_): r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] for i in range(0, len(inputs), num_targets): pred = [] for j in range(i, i + num_targets): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l1_index = 1e10 dist_linf = 1e10 dist_linf_index = 1e10 dist_l2 = 1e10 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): success = False if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): success = True else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): success = True if (success): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + num_targets) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) success_average = False if (args['untargeted']): if (np.argmax(pred_r, 1) != np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True else: if (np.argmax(pred_r, 1) == np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True if success_average: r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): failure = True if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): failure = False else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): failure = False if failure: r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show'] and m == (num_models - 1)): for j in range(i, i + num_targets): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) if (m != (num_models - 1)): lbl = "Src_" if (num_models > 2): lbl += str(m) + "_" else: lbl = "Tgt_" if (num_targets > 1): print(lbl + 'best_case_L1_mean', np.mean(d_best_l1)) print(lbl + 'best_case_L2_mean', np.mean(d_best_l2)) print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf)) print(lbl + 'best_case_prob', np.mean(r_best)) print(lbl + 'average_case_L1_mean', np.mean(d_average_l1)) print(lbl + 'average_case_L2_mean', np.mean(d_average_l2)) print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf)) print(lbl + 'average_case_prob', np.mean(r_average)) print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1)) print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2)) print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf)) print(lbl + 'worst_case_prob', np.mean(r_worst)) else: print(lbl + 'L1_mean', np.mean(d_average_l1)) print(lbl + 'L2_mean', np.mean(d_average_l2)) print(lbl + 'Linf_mean', np.mean(d_average_linf)) print(lbl + 'success_prob', np.mean(r_average))
## Copyright (C) 2016, Nicholas Carlini <*****@*****.**>. ## ## This program is licenced under the BSD 2-Clause licence, ## contained in the LICENCE file in this directory. from setup_cifar import CIFAR, CIFARModel from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np BATCH_SIZE = 1 with tf.Session() as sess: data, model = MNIST(), MNISTModel("models/mnist", sess) data, model = CIFAR(), CIFARModel("models/cifar", sess) data, model = ImageNet(), InceptionModel(sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.test_data), BATCH_SIZE): pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
def main(args): temp_encoder = encoder(level=args['level']) with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') #print('all_inputs : ', all_inputs.shape) #print('encoding_all : ',encoding_all.shape) os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 origin_correct = 0 adv_correct = 0 for i in range(all_true_ids.size): print(' adversarial_image_no: ', i) inputs = all_inputs[i:i + 1] encoding_inputs = encoding_all[i:i + 1] #print('encoding_inputs shape: ', encoding_inputs) targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(encoding_inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue origin_correct += np.argmax(labels, 1) == original_class[-1] img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 ##### llj encode_adv = np.transpose(adv, axes=(0, 3, 1, 2)) channel0, channel1, channel2 = encode_adv[:, 0, :, :], encode_adv[:, 1, :, :], encode_adv[:, 2, :, :] channel0, channel1, channel2 = temp_encoder.tempencoding( channel0), temp_encoder.tempencoding( channel1), temp_encoder.tempencoding(channel2) encode_adv = np.concatenate([channel0, channel1, channel2], axis=1) encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1)) #### llj adversarial_predict = model.model.predict(encode_adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) adv_correct += np.argmax(labels, 1) == adversarial_class[-1] success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush() print(' origin accuracy : ', 100.0 * origin_correct / all_true_ids.size) print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet_Universal(SEED), InceptionModel(sess, True) #model = InceptionModel(sess, True) else: print('Please specify a valid dataset') #orig_img = np.load('ori_img_backup.npy') orig_img = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id)] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class = util.model_prediction_u( model, orig_img[:30] ) # take 30 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img = orig_img[np.where(orig_class == class_id)] if orig_img.shape[0] < arg_bsz: assert 'no enough valid inputs' orig_img = orig_img[:arg_bsz] np.save('original_imgsID' + str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label = class_id if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label = target_id else: target_label = true_label #orig_img, target = util.generate_data(data, class_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] ## parameter if orig_img.ndim == 3 or orig_img.shape[0] == 1: d = orig_img.size # feature dim else: d = orig_img[0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[:, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label] = 0 other_prob = np.amax(attack_prob_tmp, 1) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]), delta_adv) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: if (true_label != np.argmax(attack_prob, 1)).all(): print( "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1))) else: sr = np.sum( true_label != np.argmax(attack_prob, 1)) / arg_bsz print( "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1), sr)) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush()
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 50 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K) defended_logits = model_defended.get_logits(x) # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y=y_spsa, epsilon=epsilon, is_targeted=False, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack f = open(fname + ".txt", "w") sample = np.random.choice(data.test_data.shape[0], N, replace=False) x_sample = data.test_data[sample] y_sample = np.argmax(data.test_labels[sample], axis=1) logits_nat = sess.run(defended_logits, {x: x_sample}) f.write("Accuracy on Natural Images: " + str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n") pred_adv = -1.0 * np.ones((N)) for i in range(N): x_real = x_sample[i].reshape(shape_spsa) x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]}) pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv})) f.write("Accuracy on Adversarial Images: " + str(np.mean(pred_adv == y_sample))) f.close()