def main(): batch_size = 128 tag = "GBP_0" model = CIFARModel().model # pure resnet data = CIFAR(tag) sgd = SGD(lr=0.00, momentum=0.9, nesterov=False) schedule = LearningRateScheduler(get_lr) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) datagen.fit(data.train_data) model.fit_generator(datagen.flow(data.train_data, data.train_labels, batch_size=batch_size), steps_per_epoch=data.train_data.shape[0] // batch_size, epochs=300, verbose=1, validation_data=(data.test_data, data.test_labels), callbacks=[schedule]) model.save_weights('Models/{}'.format(tag))
def main(_): with tf.Session() as sess: K.set_session(sess) if FLAGS.dataset == 'MNIST': data, model = MNIST(), MNISTModel("models/mnist", sess) elif FLAGS.datset == 'Cifar': data, model = CIFAR(), CIFARModel("models/cifar", sess) def _model_fn(x, logits=False): ybar, logits_ = model.predict(x) if logits: return ybar, logits_ return ybar if FLAGS.dataset == 'MNIST': x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02) elif FLAGS.datset == 'Cifar': x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01) X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess) X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess) np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train) np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test) print("Legit/Adversarial training set") model.evaluate(data.train_data, data.train_labels) model.evaluate(X_adv_train, data.train_labels) print("Legit/Adversarial test set") model.evaluate(data.test_data, data.test_labels) model.evaluate(X_adv_test, data.test_labels)
def DCN(trainpath,testpath,modelpath,dstl = False, target = True): train = load_data(trainpath) test = load_data(testpath) #good = load_data('data/mnist5kgood55k.pkl') if dstl: # train.dstl() test.dstl() if target: para = 9 else: para = 1 model, adv_accu = binary_model(train, test) false = detect(model,test) print('**********corrector************') region_model = CIFARModel(modelpath) t5 = time.time() c = Corrector(region_model, testpath, false, target = target,r=0.02, n = 50) error = c.correct() t6 = time.time() accuracy_good = (test.num - error[0])/test.num attack_success = (1-adv_accu)+error[1]/test.num/para print('accuracy_good:',accuracy_good) print('attack_success:',attack_success) print('time:', t6 -t5)
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs, batch_size, learning_rate): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :return: """ # Define TF model graph (for the black-box model) if DATASET == "mnist": model = MNISTModel(use_log=True).model else: model = CIFARModel(use_log=True).model predictions = model(x) print("Defined TensorFlow model graph.") # Train an MNIST model if FLAGS.load_pretrain: tf_model_load(sess) else: train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, verbose=True, save=True, args=train_params) # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :return: """ # Define TF model graph (for the black-box model) model_sub = CIFARModel(use_log = True).model preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L0A': attack = ADMML0(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[ 'attack'] != 'L0C' and args['attack'] != 'L0AE2': l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids) else: l0_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids)
mask_min = mask.min() mask_max = mask.max() mask_vis = (mask - mask_min) / (mask_max - mask_min) ax[1].imshow(mask_vis, cmap='jet', alpha=0.6) ax[1].axis('off') return fig FLAGS = flags.FLAGS cifar_data = CIFAR() test_idx = FLAGS.test_idx test_img = cifar_data.test_data[test_idx] + 0.5 tf.reset_default_graph() with tf.Session() as sess: model = CIFARModel('cifar10_model', sess, False) input_holder = tf.placeholder(tf.float32, [1, 32, 32, 3], name='x') model_out = model(input_holder) mask_net = NeuroMask(model, coeffs=(0.4, 0.35, FLAGS.smooth_lambda), temp=1, is_cifar=True) mask_net.init_model(sess) pred_ = sess.run(model_out, feed_dict={input_holder: [test_img]}) print('correct label = ', np.argmax(cifar_data.test_labels[test_idx], axis=0)) mask_result = mask_net.explain(sess, test_img, target_label=None, iters=FLAGS.num_iters)
def main(args): random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) print('ZO-minmax case') if MAX_W else print('ZO-Finite-Sum case') args["minmax"] = MAX_W class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] #arg_models = args["models_number"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] class_number = len(class_id) ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data = ImageNet_Universal(SEED) g1 = tf.Graph() with g1.as_default(): if GPUs: config1 = tf.ConfigProto(device_count={'GPU': 0}) sess1 = tf.Session(graph=g1, config=config1) else: sess1 = tf.Session(graph=g1) model1 = InceptionModel(sess1, True) # g2=tf.Graph() # with g2.as_default(): # if GPUs: # config2 = tf.ConfigProto(device_count = {'GPU': 1}) # sess2=tf.Session(graph=g2,config=config2) # else: # sess2=tf.Session(graph=g2) # model2 = ResnetModel152(sess2, True) # g3 = tf.Graph() with g3.as_default(): if GPUs: config3 = tf.ConfigProto(device_count={'GPU': 1}) sess3 = tf.Session(graph=g3, config=config3) else: sess3 = tf.Session(graph=g3) model3 = ResnetModel50(sess3, True) models = [model1, model3] else: print('Please specify a valid dataset') # preprocess data for multiple classes orig_img, true_label, target_label = [], [], [] for i in range(len(class_id)): #orig_img = np.load('ori_img_backup.npy') orig_img_ = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id[i])] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class1 = util.model_prediction_u( models[0], orig_img_ ) # take 50 or less images to make sure arg_bsz number of them are valid _, orig_class2 = util.model_prediction_u( models[1], orig_img_ ) # take 50 or less images to make sure arg_bsz number of them are valid #_, orig_class3 = util.model_prediction_u(models[2],orig_img_) # take 50 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img_ = orig_img_[np.where((orig_class1 == class_id[i]) & (orig_class2 == class_id[i]))] assert orig_img_.shape[0] >= arg_bsz, 'no enough valid inputs' orig_img.append(orig_img_[:arg_bsz]) #np.save('original_imgsID'+str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label.append(class_id[i]) # [class_id[i]]*arg_bsz if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label.append(target_id[i]) else: target_label.append(class_id[i]) #orig_img, target = util.generate_data(data, class_id, target_label) orig_img = np.array(orig_img) np.save('original_imgs_ID' + str(class_id), orig_img) print('input images shape', orig_img.shape) print('true label', true_label) print('target label', target_label) d = orig_img[0, 0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (class_number, arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros((I, len(models), class_number)) ## I: max iters l2s_loss_all = np.zeros((I, len(models), class_number)) stationary = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack weights = np.ones((len(models), class_number), dtype=np.float32) * 1.0 / (len(models) * class_number) weights_record = np.zeros((I, len(models), class_number)) sr = [] # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif args["mode"] == "ZOPGD": # we use weights w instead const here grad_est = gradient_estimation_v3(mu, q, w_img_vec, d, kappa, target_label, weights, models, orig_img, arg_targeted_attack, args["constraint"], class_number) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) if args["mode"] == "ZOPGD": d_tmp = delta_adv.copy() delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box_models(delta_adv, orig_img_vec, V_temp, -0.5, 0.5, 16 / 256) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: # we are here for m in range(len(models)): for n in range(class_number): total_loss[i, m, n] = function_evaluation_cons_models( w_img_vec[n], kappa, target_label[n], const, models[m], orig_img[n], arg_targeted_attack) # solve max of w here if args["mode"] == "ZOPGD": if MAX_W: w_tmp = weights.copy() w_grad = total_loss[i] - 2 * args["lmd"] * ( weights - 1 / (len(models) * class_number)) w_proj = weights + args["beta"] * w_grad weights = util.bisection(w_proj, 1, 1e-5, ub=1e5) weights_record[i] = weights if MAX_W: stationary[i] = util.stationary_gap(d_tmp, delta_adv, base_lr, w_tmp, weights, args["beta"]) #print(stationary[i]) ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## print_iteration ## update the best solution in the iterations #print(weights) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: for m in range(len(models)): for c in range(class_number): #print('model',m,' class id',class_id[c]) attack_prob, _, _ = util.model_prediction( models[m], adv_img[c]) target_prob = attack_prob[:, target_label[c]] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label[c]] = 0 other_prob = np.amax(attack_prob_tmp, 1) sr.append( np.sum(true_label[c] != np.argmax(attack_prob, 1)) / arg_bsz) if (true_label[c] != np.argmax(attack_prob, 1)).all(): print( "model %d class_id %d Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, TL = %d, PL = %s" % (m, class_id[c], i + 1, class_id[c], args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i, m, c], true_label[c], np.argmax(attack_prob, 1))) else: print( "model %d class_id %d Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, succ rate = %.2f" % (m, class_id[c], i + 1, class_id[c], args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i, m, c], sr[-1])) print(weights) #print(np.max(np.abs(delta_adv)),np.min(w_img_vec),np.max(w_img_vec),np.sum(total_loss[i]),) print('sum of losses: ', np.sum(total_loss[i]), 'weighted loss', np.sum(total_loss[i] * weights)) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs_nips/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]) + str(args["lmd"]), delta_adv) #np.save('retimgs/'+str(i)+'itrs'+str(np.argmax(attack_prob,1))+arg_mode+str(args["lr"])+'_weights',weights_record) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or(true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if (attack_flag): ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id[0], arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez( "{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, weights=weights_record, sr=np.array(sr), stationary=stationary #best_distortion=best_distortion, first_distortion=first_distortion, #first_iteration=first_iteration, best_iteation=best_iteration, #learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag ) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id[0], arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez( "{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, weights=weights_record, sr=np.array(sr), stationary=stationary #best_distortion=best_distortion, learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag ) print("Attack Fails") sys.stdout.flush()
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if (args['dataset'] == "cifar"): data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if (args['adversarial'] != "none"): model = MNISTModel("models/mnist_cw" + str(args['adversarial']), sess) if (args['temp'] and args['dataset'] == 'mnist'): model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if (args['temp'] and args['dataset'] == 'cifar'): model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], inception=inception, handpick=handpick, train=args['train'], seed=args['seed']) timestart = time.time() if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs) / args['batch_size'], "random instances.") if (args['train']): np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) return r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for i in range(0, len(inputs), args['batch_size']): pred = [] for j in range(i, i + args['batch_size']): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l2 = 1e10 dist_linf = 1e10 dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + args['batch_size']) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show']): for j in range(i, i + args['batch_size']): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) print('best_case_L1_mean', np.mean(d_best_l1)) print('best_case_L2_mean', np.mean(d_best_l2)) print('best_case_Linf_mean', np.mean(d_best_linf)) print('best_case_prob', np.mean(r_best)) print('average_case_L1_mean', np.mean(d_average_l1)) print('average_case_L2_mean', np.mean(d_average_l2)) print('average_case_Linf_mean', np.mean(d_average_linf)) print('average_case_prob', np.mean(r_average)) print('worst_case_L1_mean', np.mean(d_worst_l1)) print('worst_case_L2_mean', np.mean(d_worst_l2)) print('worst_case_Linf_mean', np.mean(d_worst_linf)) print('worst_case_prob', np.mean(r_worst))
def load_model(self, dataset="mnist", model_name="2-layer", activation="relu", model=None, batch_size=0, compute_slope=False, order=1): """ model: if set to None, then load dataset with model_name. Otherwise use the model directly. dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point. model_name: possible options are 2-layer, distilled, and normal """ from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel from nlayer_model import NLayerModel from setup_imagenet import ImageNet, ImageNetModel # if set this to true, we will use the logit layer output instead of probability # the logit layer's gradients are usually larger and more stable output_logits = True self.dataset = dataset self.model_name = model_name if model is None: print('Loading model...') if dataset == "mnist": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerMNISTModel("models/mnist_2layer", self.sess, not output_logits) elif model_name == "normal": if activation == "relu": model = MNISTModel("models/mnist", self.sess, not output_logits) else: print("actviation = {}".format(activation)) model = MNISTModel("models/mnist_cnn_7layer_" + activation, self.sess, not output_logits, activation=activation) time.sleep(5) elif model_name == "brelu": model = MNISTModel("models/mnist_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = MNISTModel("models/mnist-distilled-100", self.sess, not output_logits) else: # specify model parameters as N,M,opts model_params = model_name.split(",") if len(model_params) < 3: raise (RuntimeError("incorrect model option" + model_name)) numlayer = int(model_params[0]) nhidden = int(model_params[1]) modelfile = "models/mnist_{}layer_relu_{}_{}".format( numlayer, nhidden, model_params[2]) print("loading", modelfile) model = NLayerModel([nhidden] * (numlayer - 1), modelfile) elif dataset == "cifar": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerCIFARModel("models/cifar_2layer", self.sess, not output_logits) elif model_name == "normal": if activation == "relu": model = CIFARModel("models/cifar", self.sess, not output_logits) else: model = CIFARModel("models/cifar_cnn_7layer_" + activation, self.sess, not output_logits, activation=activation) elif model_name == "brelu": model = CIFARModel("models/cifar_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = CIFARModel("models/cifar-distilled-100", self.sess, not output_logits) else: # specify model parameters as N,M,opts model_params = model_name.split(",") if len(model_params) < 3: raise (RuntimeError("incorrect model option" + model_name)) numlayer = int(model_params[0]) nhidden = int(model_params[1]) modelfile = "models/cifar_{}layer_relu_{}_{}".format( numlayer, nhidden, model_params[2]) print("loading", modelfile) model = NLayerModel([nhidden] * (numlayer - 1), modelfile, image_size=32, image_channel=3) elif dataset == "imagenet": self.batch_size = 32 model = ImageNetModel(self.sess, use_softmax=not output_logits, model_name=model_name, create_prediction=False) else: raise (RuntimeError("dataset unknown")) #print("*** Loaded model successfully") self.model = model self.compute_slope = compute_slope if batch_size != 0: self.batch_size = batch_size ## placeholders: self.img, self.true_label, self.target_label # img is the placeholder for image input self.img = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # output is the output tensor of the entire network self.output = model.predict(self.img) # create the graph to compute gradient # get the desired true label and target label self.true_label = tf.placeholder(dtype=tf.int32, shape=[]) self.target_label = tf.placeholder(dtype=tf.int32, shape=[]) true_output = self.output[:, self.true_label] target_output = self.output[:, self.target_label] # get the difference self.objective = true_output - target_output # get the gradient(deprecated arguments) self.grad_op = tf.gradients(self.objective, self.img)[0] # compute gradient norm: (in computation graph, so is faster) grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1)) self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1) self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1) self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1) ### Lily: added Hessian-vector product calculation here for 2nd order bound: if order == 2: ## _hessian_vector_product(ys, xs, v): return a list of tensors containing the product between the Hessian and v ## ys: a scalar valur or a tensor or a list of tensors to be summed to yield of scalar ## xs: a list of tensors that we should construct the Hessian over ## v: a list of tensors with the same shape as xs that we want to multiply by the Hessian # self.randv: shape = (Nimg,28,28,1) (the v in _hessian_vector_product) self.randv = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # hv_op_tmp: shape = (Nimg,28,28,1) for mnist, same as self.img (the xs in _hessian_vector_product) hv_op_tmp = gradients_impl._hessian_vector_product( self.objective, [self.img], [self.randv])[0] # hv_op_rs: reshape hv_op_tmp to hv_op_rs whose shape = (Nimg, 784) for mnist hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1)) # self.hv_norm_op: norm of hessian vector product, keep shape = (Nimg,1) using keepdims self.hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True) # hv_op_rs_normalize: normalize Hv to Hv/||Hv||, shape = (Nimg, 784) hv_op_rs_normalize = hv_op_rs / self.hv_norm_op # self.hv_op: reshape hv_op_rs_normalize to shape = (Nimg,28,28,1) self.hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp)) ## reshape randv and compute its norm # shape: (Nimg, 784) randv_rs = tf.reshape(self.randv, (tf.shape(self.randv)[0], -1)) # shape: (Nimg,) self.randv_norm_op = tf.norm(randv_rs, axis=1) ## compute v'Hv: use un-normalized Hv (hv_op_tmp, hv_op_rs) # element-wise multiplication and then sum over axis = 1 (now shape: (Nimg,)) self.vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1) ## compute Rayleigh quotient: v'Hv/v'v (estimated largest eigenvalue), shape: (Nimg,) # note: self.vhv_op and self.randv_norm_op has to be in the same dimension (either (Nimg,) or (Nimg,1)) self.eig_est = self.vhv_op / tf.square(self.randv_norm_op) ## Lily added the tf.while to compute the eigenvalue in computational graph later # cond for computing largest abs/neg eigen-value def cond(it, randv, eig_est, eig_est_prev, tfconst): norm_diff = tf.norm(eig_est - eig_est_prev, axis=0) return tf.logical_and(it < 500, norm_diff > 0.001) # compute largest abs eigenvalue: tfconst = 0 # compute largest neg eigenvalue: tfconst = 10 def body(it, randv, eig_est, eig_est_prev, tfconst): #hv_op_tmp = gradients_impl._hessian_vector_product(self.objective, [self.img], [randv])[0]-10*randv hv_op_tmp = gradients_impl._hessian_vector_product( self.objective, [self.img], [randv])[0] - tf.multiply( tfconst, randv) hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1)) hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True) hv_op_rs_normalize = hv_op_rs / hv_norm_op hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp)) randv_rs = tf.reshape(randv, (tf.shape(randv)[0], -1)) randv_norm_op = tf.norm(randv_rs, axis=1) vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1) eig_est_prev = eig_est eig_est = vhv_op / tf.square(randv_norm_op) return (it + 1, hv_op, eig_est, eig_est_prev, tfconst) it = tf.constant(0) # compute largest abs eigenvalue result = tf.while_loop( cond, body, [it, self.randv, self.vhv_op, self.eig_est, tf.constant(0.0)]) # compute largest neg eigenvalue self.shiftconst = tf.placeholder(shape=(), dtype=tf.float32) result_1 = tf.while_loop( cond, body, [it, self.randv, self.vhv_op, self.eig_est, self.shiftconst]) # computing largest abs eig value and save result self.it = result[0] self.while_hv_op = result[1] self.while_eig = result[2] # computing largest neg eig value and save result self.it_1 = result_1[0] #self.while_eig_1 = tf.add(result_1[2], tfconst) self.while_eig_1 = tf.add(result_1[2], result_1[4]) show_tensor_op = False if show_tensor_op: print("====================") print("Define hessian_vector_product operator: ") print("hv_op_tmp = {}".format(hv_op_tmp)) print("hv_op_rs = {}".format(hv_op_rs)) print("self.hv_norm_op = {}".format(self.hv_norm_op)) print("hv_op_rs_normalize = {}".format(hv_op_rs_normalize)) print("self.hv_op = {}".format(self.hv_op)) print("self.grad_op = {}".format(self.grad_op)) print("randv_rs = {}".format(randv_rs)) print("self.randv_norm_op = {}".format(self.randv_norm_op)) print("self.vhv_op = {}".format(self.vhv_op)) print("self.eig_est = {}".format(self.eig_est)) print("====================") return self.img, self.output
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data = MNIST() inception = False if (args['adversarial'] != "none"): model = MNISTModel( "models/mnist_cw" + str(args['adversarial']), sess) elif (args['temp']): model = MNISTModel( "models/mnist-distilled-" + str(args['temp']), sess) else: model = MNISTModel("models/mnist", sess) if (args['dataset'] == "cifar"): data = CIFAR() inception = False if (args['adversarial'] != "none"): model = CIFARModel( "models/cifar_cw" + str(args['adversarial']), sess) elif (args['temp']): model = CIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) else: model = CIFARModel("models/cifar", sess) if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet'], 2 * args['numimg']), InceptionModel(sess) inception = True inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=not args['untargeted'], target_num=args['targetnum'], inception=inception, train=args['train'], seed=args['seed']) timestart = time.time() if (args['restore_np']): if (args['train']): adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy') else: adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '.npy') else: if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() if args['untargeted']: num_targets = 1 else: num_targets = args['targetnum'] print("Took", timeend - timestart, "seconds to run", len(inputs) / num_targets, "random instances.") if (args['save_np']): if (args['train']): np.save(str(args['dataset']) + '_labels_train.npy', labels) np.save( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy', adv) else: np.save( str(args['dataset']) + '_' + str(args['attack'] + '.npy'), adv) r_best_ = [] d_best_l1_ = [] d_best_l2_ = [] d_best_linf_ = [] r_average_ = [] d_average_l1_ = [] d_average_l2_ = [] d_average_linf_ = [] r_worst_ = [] d_worst_l1_ = [] d_worst_l2_ = [] d_worst_linf_ = [] #Transferability Tests model_ = [] model_.append(model) if (args['targetmodel'] != "same"): if (args['targetmodel'] == "dd_100"): model_.append(MNISTModel("models/mnist-distilled-100", sess)) num_models = len(model_) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for m, model in enumerate(model_): r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] for i in range(0, len(inputs), num_targets): pred = [] for j in range(i, i + num_targets): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l1_index = 1e10 dist_linf = 1e10 dist_linf_index = 1e10 dist_l2 = 1e10 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): success = False if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): success = True else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): success = True if (success): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + num_targets) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) success_average = False if (args['untargeted']): if (np.argmax(pred_r, 1) != np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True else: if (np.argmax(pred_r, 1) == np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True if success_average: r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): failure = True if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): failure = False else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): failure = False if failure: r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show'] and m == (num_models - 1)): for j in range(i, i + num_targets): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) if (m != (num_models - 1)): lbl = "Src_" if (num_models > 2): lbl += str(m) + "_" else: lbl = "Tgt_" if (num_targets > 1): print(lbl + 'best_case_L1_mean', np.mean(d_best_l1)) print(lbl + 'best_case_L2_mean', np.mean(d_best_l2)) print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf)) print(lbl + 'best_case_prob', np.mean(r_best)) print(lbl + 'average_case_L1_mean', np.mean(d_average_l1)) print(lbl + 'average_case_L2_mean', np.mean(d_average_l2)) print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf)) print(lbl + 'average_case_prob', np.mean(r_average)) print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1)) print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2)) print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf)) print(lbl + 'worst_case_prob', np.mean(r_worst)) else: print(lbl + 'L1_mean', np.mean(d_average_l1)) print(lbl + 'L2_mean', np.mean(d_average_l2)) print(lbl + 'Linf_mean', np.mean(d_average_linf)) print(lbl + 'success_prob', np.mean(r_average))
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) bias = float(args[4]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str( K) + "_" + str(bias) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 1000 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias) defended_logits = model_defended.get_logits(x) # Get the predictions on the original images labels = np.argmax(data.test_labels[:N], axis=1) logits_real = sess.run(defended_logits, {x: data.test_data[:N]}) fp = (np.argmax(logits_real, axis=1) == 10) #False positives of the defense pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1), axis=1) #Original model prediction # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y_target=y_spsa, epsilon=epsilon, is_targeted=True, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack pred_adv = -1.0 * np.ones((N, 10)) for i in range(N): if i % 10 == 0: print(fname, " ", i) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() x_real = data.test_data[i].reshape(shape_spsa) # Try a targeted attack for each class other than the original network prediction and the adversarial class for y in range(10): if y != pred_undefended[i]: x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y}) pred_adv[i, y] = np.argmax(sess.run(defended_logits, {x: x_adv})) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() analysis(fname)
inputs.append(data.test_data[start + i]) targets.append(data.test_labels[start + i]) inputs = np.array(inputs) targets = np.array(targets) return inputs, targets if __name__ == "__main__": with tf.Session() as sess: data = CIFAR("ORI") Model = CIFARModel(restore="Models/CIFAR10_End2End_Trainable", end2end=True) attack = CarliniL2(sess, Model, batch_size=9, max_iterations=1000, confidence=0) inputs, targets = generate_data(data, samples=1, targeted=True, start=0, inception=False) timestart = time.time() adv = attack.attack(inputs, targets)
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 500 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model defense = DefendedModel(base_model, x_train, y_train, K) get_votes = defense.get_votes( x) # Should this be get_votes, introducing separate method get_logits = defense.get_logits(x) # Configure the attack attack = SPSA(defense, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y=y_spsa, epsilon=0.01, is_targeted=False, num_steps=100, batch_size=2048, early_stop_loss_threshold=-0.05) # Run the test sample = np.random.choice(data.test_data.shape[0], N, replace=False) x_sample = data.test_data[sample] y_sample = np.argmax(data.test_labels[sample], axis=1) votes = sess.run(get_votes, {x: x_sample}) count = 0 bound = 0 correct = 0 for i in range(N): if votes[i, 0] > 0: count += 1 # Project via an adversarially attack on the votest #x_real = x_sample[i].reshape(shape_spsa) #x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: 0}) #TODO: not adv, is projected x_proj = sess.run(get_logits, {x: x_sample[i]}) projection_labels = np.argmax(x_proj, axis=1) successful_projections = projection_labels[np.nonzero( projection_labels * (projection_labels != 10))] # Check if the projection was a success if successful_projections.shape[0] != 0: bound += 1 # Check if the projection is predicted correctly if y_sample[i] == np.argmax(sess.run(get_logits, {x: x_proj}), axis=1)[0]: correct += 1 print("FP Count: ", count) print("FP Recovery in Bounds: ", bound / count) print("FP Recovery Accuracy: ", correct / count)
image_dim = 299 image_channels = 3 num_labels = 1001 model = InceptionModel(sess, use_log=True) elif FLAGS.model == 'mnist': dataset = MNIST() model = MNISTModel('models/mnist', sess, use_log=True) image_dim = 28 image_channels = 1 num_labels = 10 inputs, targets, reals = utils.generate_data( dataset, FLAGS.test_size) assert FLAGS.resize_dim is None, 'Dimensionality reduction of noise is used only for ImageNet models' elif FLAGS.model == 'cifar': dataset = CIFAR() model = CIFARModel('models/cifar', sess, use_log=True) image_dim = 32 image_channels = 3 num_labels = 10 inputs, targets, reals = utils.generate_data( dataset, FLAGS.test_size) assert FLAGS.resize_dim is None, 'Dimensionality reduction of noise is used only for ImageNet models' else: raise ValueError('Incorrect model name provided ({})'.format( FLAGS.model)) test_in = tf.placeholder(tf.float32, (1, image_dim, image_dim, image_channels), 'x') test_pred = tf.argmax(model.predict(test_in), axis=1) attack = GenAttack2(model=model,
targets = np.array(targets) return inputs, targets if __name__ == "__main__": with tf.Session() as sess: #data, model = MNIST(), Classifier(sess) data = CIFAR10() # target model if sys.argv[1] == 'our': model = Classifier(input_shape=data.IMG_SHAPE, session=sess) model.restore('../Clf/models/cifar_classifier') elif sys.argv[1] == 'orgONLY': model = CIFARModel('models/cifar', sess) elif sys.argv[1] == 'orgDIS': model = CIFARModel('models/cifar-distilled-100', sess) else: print('Wrong Parameters') sys.exit() # init attack attack = CarliniL2(sess, model, targeted=False, max_iterations=1000, confidence=10, boxmin=0, boxmax=1) #inputs, targets = generate_data(data, samples=128, targeted=False, start=0, inception=False) inputs = data.X_test[:128] targets = data.y_test[:128] timestart = time.time() adv = attack.attack(inputs, targets)
def test_cw(): sess = tf.Session() # sess.run(tf.global_variables_initializer()) # keras maintains a tf session. It must be set by either # keras.backend.set_session(sess), or use inside a context manager # sess.as_default() with sess.as_default(): data, model = MNIST(), MNISTModel("models/mnist", sess) with sess.as_default(): data, model = CIFAR(), CIFARModel("models/cifar", sess) # testing the model np.argmax(model.model.predict(data.test_data[:10]), axis=1) print(np.argmax(data.test_labels[:10], axis=1)) #data, model = CIFAR(), CIFARModel("models/cifar", sess) attack_l2 = CarliniL2(sess, model, batch_size=10, max_iterations=1000, confidence=0) attack_l0 = CarliniL0(sess, model, max_iterations=1000, initial_const=10, largest_const=15) attack_li = CarliniLi(sess, model) inputs, targets = generate_data(data, samples=1, targeted=True, start=0, inception=False) # TODO find the first digits of each kind, try map it to the next digit inputs, targets = generate_data_2(data) adv_l2 = attack_l2.attack(inputs, targets) adv_l0 = attack_l0.attack(inputs, targets) adv_li = attack_li.attack(inputs, targets) plt.tight_layout() plt.tight_layout(pad=1, w_pad=1, h_pad=1) grid_show_image(inputs, 10, 1, 'images/orig-mnist.png') grid_show_image(adv_l2, 10, 1, 'images/l2.png') grid_show_image(adv_l0, 10, 1, 'images/l0.png') grid_show_image(adv_li, 9, 2, 'images/li.png') from contextlib import redirect_stdout redirect_stdout np.sum((adv_l2[0] - inputs[0])**2) # np.argmax(targets, axis=1) # import keras # keras.backend.set_session(sess) np.argmax(model.model.predict(inputs), axis=1) np.argmax(targets, axis=1) # # (((adv_l2 + 0.5)*255).round()) np.argmax(model.model.predict(adv_l2), axis=1) np.argmax(model.model.predict(adv_l0), axis=1) np.argmax(model.model.predict(adv_li), axis=1) np.sum(model.model.predict(adv_l2), axis=1) np.sum(sess.run(tf.nn.softmax(model.model.predict(adv_l2))), axis=1) softmax_pred = sess.run(tf.nn.softmax(model.model.predict(adv_l2))) softmax_pred[0] np.argmax(softmax_pred, axis=1) keras.activations.softmax(model.model) model.model.predict(((adv_l2 + 0.5) * 255).round())
def main(args): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MadryMNISTModel("models/secret/", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) #data, model = CIFAR(), MadryCIFARModel("models/model_0/", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel( sess, False) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = MadryCIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=True, target_num=args['target_number'], start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'LiCW': attack = CarliniLi(sess, model, max_iterations=args['maxiter'], abort_early=args['abort_early']) if args['attack'] == 'L2A': attack = ADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) if args['attack'] == 'L2AE': attack = ADMML2en(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], ro=args['ro'], iteration_steps=args['iteration_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2LA': attack = LADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) if args['attack'] == 'L2LAST': attack = LADMMSTL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early'], retrain=args['retrain']) if args['attack'] == 'LiIF': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) if args['attack'] == 'LiF': attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) if args['attack'] == 'L1': attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) if args['attack'] == 'L1EN': attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) if args['attack'] == 'L1IFGM': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) if args['attack'] == 'L2IFGM': attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) if args['attack'] == 'L1FGM': attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) if args['attack'] == 'L2FGM': attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) #if (args['conf'] != 0): # model = MNISTModel("models/mnist-distilled-100", sess) l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids)
def main(args): with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) elif args['dataset'] == "imagenet": # data, model = ImageNet(), InceptionModel(sess, use_log) data, model = ImageNet(), ClarifaiModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image # swtiched batch size to 8 attack = BlackBoxL2(sess, model, batch_size=45, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) #print(type(all_inputs)) #print(type(all_targets)) #print(type(all_labels)) #print(type(all_true_ids)) #print(all_targets[0]) #print(all_labels[0]) #print(type(all_true_ids[0])) print('Done...') os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 for i in range(all_true_ids.size): inputs = all_inputs[i:i + 1] targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] #print("true labels:", np.argmax(labels), labels) #print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(inputs) print(original_predict) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) #if original_class[-1] != np.argmax(labels): # print("skip wrongly classified image no. {}, original class {}, classified as {}".format(i, np.argmax(labels), original_class[-1])) # continue img_no += 1 timestart = time.time() print(inputs.shape) print("shape target", targets.shape) print(targets) adv, const = attack.attack_batch(inputs, targets, img_no) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 adversarial_predict = model.model.predict(adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush()
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'], samplesT=args['numimgT'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2LA2': attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], layernum=args['layer_number'], use_kernel=args['use_kernel'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['conf'] != 0: model = MNISTModel("models/mnist-distilled-100", sess) if args['kernel_bias']: EP = evaluate_perturbation_kb(args, sess, model, inputs) scores, l2 = EP(inputs, targets, adv) EPT = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores = EPT(data.test_data, data.test_labels) EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs) scores2 = EP2(inputs, targets, adv) EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores2 = EPT2(data.test_data, data.test_labels) else: EP = evaluate_perturbation(args, sess, model, inputs) # scores = EP(inputs, targets, adv) # scores2 = EP2(inputs, targets, adv) score_count = [] score_count2 = [] score_count3 = [] score_count4 = [] for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(targets[e]): score_count.append(1) if e < args['numimg']: score_count4.append(1) else: score_count.append(0) if e < args['numimg']: score_count4.append(0) for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(labels[e]): score_count3.append(1) else: score_count3.append(0) for e, (sc2) in enumerate(scores2): if np.argmax(sc2) == np.argmax(labels[e]): score_count2.append(1) else: score_count2.append(0) test_score_count = [] test_score_count2 = [] for e, (tsc) in enumerate(test_scores): if np.argmax(tsc) == np.argmax(data.test_labels[e]): test_score_count.append(1) else: test_score_count.append(0) for e, (tsc2) in enumerate(test_scores2): if np.argmax(tsc2) == np.argmax(data.test_labels[e]): test_score_count2.append(1) else: test_score_count2.append(0) l0s = np.count_nonzero(adv) successrate = np.mean(score_count) successrate2 = np.mean(score_count2) successrate3 = np.mean(score_count3) test_successrate = np.mean(test_score_count) test_successrate2 = np.mean(test_score_count2) print('original model, success rate of T images for the original labels:', successrate2) print('modified model, success rate of T images for the original labels:', successrate3) print('modified model, success rate of T images for the target labels:', successrate) print('modified model, success rate of S imges for the target labels:', np.mean(score_count4)) print('modified model, success rate of test set for the original labels:', test_successrate) print('original model, success rate of test set for the original labels:', test_successrate2) print('l0 distance:', l0s) print('l2 distance:', l2)
def main(args): with tf.Session() as sess: print("Loading data and classification model: {}".format( args["dataset"])) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_softmax=True) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_softmax=True) elif args['dataset'] == "imagenet": # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True) data, model = ImageNetDataNP(), InceptionModel(sess, use_softmax=True) # elif args['dataset'] == "imagenet_np": if len(data.test_labels) < args["num_img"]: raise Exception("No enough data, only have {} but need {}".format( len(data.test_labels), args["num_img"])) if args["attack_single_img"]: # manually setup attack set # attacking only one image with random attack] orig_img = data.test_data orig_labels = data.test_labels orig_img_id = np.array([1]) if args["attack_type"] == "targeted": target_labels = [ np.eye(model.num_labels)[args["single_img_target_label"]] ] else: target_labels = orig_labels else: # generate attack set if args["dataset"] == "imagenet" or args[ "dataset"] == "imagenet_np": shift_index = True else: shift_index = False if args["random_target"] and (args["dataset"] == "imagenet" or args["dataset"] == "imagenet_np"): # find all possible class all_class = np.unique(np.argmax(data.test_labels, 1)) all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=all_class, shift_index=shift_index) elif args["random_target"]: # random target on all possible classes class_num = data.test_labels.shape[1] all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=list(range(class_num)), shift_index=shift_index) else: all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], shift_index=shift_index) # check attack data # for i in range(len(orig_img_id)): # tar_lab = np.argmax(target_labels[i]) # orig_lab = np.argmax(orig_labels[i]) # print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i])) # attack related settings if args["attack_method"] == "zoo" or args[ "attack_method"] == "autozoom_bilin": if args["img_resize"] is None: args["img_resize"] = model.image_size print( "Argument img_resize is not set and not using autoencoder, set to image original size:{}" .format(args["img_resize"])) if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae": if args["batch_size"] is None: args["batch_size"] = 128 print( "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}." .format(args["batch_size"])) else: if args["batch_size"] is not None: print("Argument batch_size is not used") args["batch_size"] = 1 # force to be 1 if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #_, decoder = util.load_codec(args["codec_prefix"]) if args["dataset"] == "mnist" or args["dataset"] == "cifar10": codec = CODEC(model.image_size, model.num_channels, args["compress_mode"], use_tanh=False) else: codec = CODEC(128, model.num_channels, args["compress_mode"]) print(args["codec_prefix"]) codec.load_codec(args["codec_prefix"]) decoder = codec.decoder print(decoder.input_shape) args["img_resize"] = decoder.input_shape[1] print("Using autoencoder, set the attack image size to:{}".format( args["img_resize"])) # setup attack if args["attack_method"] == "zoo": blackbox_attack = ZOO(sess, model, args) elif args["attack_method"] == "zoo_ae": blackbox_attack = ZOO_AE(sess, model, args, decoder) elif args["attack_method"] == "autozoom_bilin": blackbox_attack = AutoZOOM_BiLIN(sess, model, args) elif args["attack_method"] == "autozoom_ae": blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec) save_prefix = os.path.join(args["save_path"], args["dataset"], args["attack_method"], args["attack_type"]) os.system("mkdir -p {}".format(save_prefix)) total_success = 0 l2_total = 0 for i in range(all_orig_img_id.size): orig_img = all_orig_img[i:i + 1] target = all_target_labels[i:i + 1] label = all_orig_labels[i:i + 1] target_class = np.argmax(target) true_class = np.argmax(label) test_index = all_orig_img_id[i] # print information print( "[Info][Start]{}: test_index:{}, true label:{}, target label:{}" .format(i, test_index, true_class, target_class)) if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #print ae info if args["dataset"] == "mnist" or args["dataset"] == "cifar10": temp_img = all_orig_img[i:i + 1] else: temp_img = all_orig_img[i] temp_img = (temp_img + 0.5) * 255 temp_img = scipy.misc.imresize(temp_img, (128, 128)) temp_img = temp_img / 255 - 0.5 temp_img = np.expand_dims(temp_img, axis=0) encode_img = codec.encoder.predict(temp_img) decode_img = codec.decoder.predict(encode_img) diff_img = (decode_img - temp_img) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) timestart = time.time() adv_img = blackbox_attack.attack(orig_img, target) timeend = time.time() if len(adv_img.shape) == 3: adv_img = np.expand_dims(adv_img, axis=0) l2_dist = np.sum((adv_img - orig_img)**2)**.5 adv_class = np.argmax(model.model.predict(adv_img)) success = False if args["attack_type"] == "targeted": if adv_class == target_class: success = True else: if adv_class != true_class: success = True if success: total_success += 1 l2_total += l2_dist print( "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}" .format(i, test_index, true_class, adv_class, success, l2_dist, total_success / (i + 1), 0 if total_success == 0 else l2_total / total_success)) # save images suffix = "id{}_testIndex{}_true{}_adv{}".format( i, test_index, true_class, adv_class) # original image save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix)) util.save_img(orig_img, save_name) save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix)) np.save(save_name, orig_img) # adv image save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix)) util.save_img(adv_img, save_name) save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix)) np.save(save_name, adv_img) # diff image save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix)) util.save_img((adv_img - orig_img) / 2, save_name) save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix)) np.save(save_name, adv_img - orig_img)
def inverse_func(x): return (x + 0.5) * 255 if __name__ == '__main__': path = os.getcwd() os.chdir('../../../') # avd_filename = data_folder + 'adversarial_labels_' + str(images_per_label) + '.pickle' # with open(avd_filename, 'rb') as handle: # n = to_categorical(pickle.load(handle), 1000) with K.tf.Session() as sess: dataset, model = CIFAR(), CIFARModel(path + '/models/cifar-distilled-80', sess, True).model model.trainable = False model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc']) len_test = len(dataset.test_data) p = np.arange(len_test, dtype=int) np.random.shuffle(p) p = p[:1000] data = dataset.test_data[p] label = dataset.test_labels[p] y_label = np.argmax(label, axis=1) r = model.predict(data[:2]) n = np.random.randint(10, size=(1000,)) while np.any(n == y_label):
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 50 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K) defended_logits = model_defended.get_logits(x) # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y=y_spsa, epsilon=epsilon, is_targeted=False, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack f = open(fname + ".txt", "w") sample = np.random.choice(data.test_data.shape[0], N, replace=False) x_sample = data.test_data[sample] y_sample = np.argmax(data.test_labels[sample], axis=1) logits_nat = sess.run(defended_logits, {x: x_sample}) f.write("Accuracy on Natural Images: " + str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n") pred_adv = -1.0 * np.ones((N)) for i in range(N): x_real = x_sample[i].reshape(shape_spsa) x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]}) pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv})) f.write("Accuracy on Adversarial Images: " + str(np.mean(pred_adv == y_sample))) f.close()
def load_model(self, dataset="mnist", model_name="2-layer", model=None, batch_size=0, compute_slope=False): """ model: if set to None, then load dataset with model_name. Otherwise use the model directly. dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point. model_name: possible options are 2-layer, distilled, and normal """ import tensorflow as tf from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel from setup_imagenet import ImageNet, ImageNetModel # if set this to true, we will use the logit layer output instead of probability # the logit layer's gradients are usually larger and more stable output_logits = True self.dataset = dataset self.model_name = model_name if model is None: print('Loading model...') if dataset == "mnist": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerMNISTModel("models/mnist_2layer", self.sess, not output_logits) elif model_name == "normal": model = MNISTModel("models/mnist", self.sess, not output_logits) elif model_name == "brelu": model = MNISTModel("models/mnist_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = MNISTModel("models/mnist-distilled-100", self.sess, not output_logits) else: raise (RuntimeError("incorrect model option")) elif dataset == "cifar": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerCIFARModel("models/cifar_2layer", self.sess, not output_logits) elif model_name == "normal": model = CIFARModel("models/cifar", self.sess, not output_logits) elif model_name == "brelu": model = CIFARModel("models/cifar_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = CIFARModel("models/cifar-distilled-100", self.sess, not output_logits) else: raise (RuntimeError("incorrect model option")) elif dataset == "imagenet": self.batch_size = 32 model = ImageNetModel(self.sess, use_softmax=not output_logits, model_name=model_name, create_prediction=False) else: raise (RuntimeError("dataset unknown")) self.model = model self.compute_slope = compute_slope if batch_size != 0: self.batch_size = batch_size # img is the placeholder for image input self.img = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # output is the output tensor of the entire network self.output = model.predict(self.img) # create the graph to compute gradient # get the desired true label and target label self.true_label = tf.placeholder(dtype=tf.int32, shape=[]) self.target_label = tf.placeholder(dtype=tf.int32, shape=[]) true_output = self.output[:, self.true_label] target_output = self.output[:, self.target_label] # get the different self.objective = true_output - target_output # get the gradient self.grad_op = tf.gradients(self.objective, self.img)[0] # compute gradient norm grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1)) self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1) self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1) self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1) return self.img, self.output
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) image_id_set = np.random.choice(range(1000), args["image_number"] * 3, replace=False) #image_id_set = np.random.randint(1, 1000, args["image_number"] ) arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet(SEED), InceptionModel(sess, True) else: print('Please specify a valid dataset') succ_count, ii, iii = 0, 0, 0 final_distortion_count,first_iteration_count, first_distortion_count = [], [], [] while iii < args["image_number"]: ii = ii + 1 image_id = image_id_set[ii] # if image_id!= 836: continue # for test only orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) ## orig_class: predicted label; if arg_targeted_attack: ### target attack target_label = np.remainder(orig_class + 1, 10) else: target_label = orig_class orig_img, target = util.generate_data(data, image_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] true_label_list = np.argmax(data.test_labels, axis=1) true_label = true_label_list[image_id] print("Image ID:{}, infer label:{}, true label:{}".format( image_id, orig_class, true_label)) if true_label != orig_class: print( "True Label is different from the original prediction, pass!" ) continue else: iii = iii + 1 print('\n', iii, '/', args["image_number"]) ## parameter d = orig_img.size # feature dim print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (1, d)) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation #delta_adv = np.random.uniform(-16/255,16/255,(1,d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = np.arctanh( 2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) * 0.999999) else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5) # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.9 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[ i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt v_hat = np.maximum(v_hat, v) #print(np.mean(v_hat)) delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[0, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[0, target_label] = 0 other_prob = np.amax(attack_prob_tmp) if args["print_iteration"]: if np.remainder(i + 1, 1) == 0: if true_label != np.argmax(attack_prob): print( "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) else: print( "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( image_id, arg_mode, true_label, np.argmax(attack_prob), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if (np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if (np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data succ_count = succ_count + 1 final_distortion_count.append(l2s_loss_all[-1]) first_distortion_count.append(first_distortion) first_iteration_count.append(first_iteration) suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush() print('succ rate:', succ_count / args["image_number"]) print('average first success l2', np.mean(first_distortion_count)) print('average first itrs', np.mean(first_iteration_count)) print('average l2:', np.mean(final_distortion_count), ' best l2:', np.min(final_distortion_count), ' worst l2:', np.max(final_distortion_count))
def main(args): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=args['targeted'], start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], targeted=args['targeted'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2BB': # score-based ZO-ADMM attack attack = LADMMBB(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], targeted=args['targeted'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early'], gama=args['gama'], epi=args['epi'], alpha=args['alpha']) timestart = time.time() # adv = attack.attack(inputs, targets) adv, querycount, queryl2 = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['targeted']: l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2) else: l2_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2)
## ## This program is licenced under the BSD 2-Clause licence, ## contained in the LICENCE file in this directory. from setup_cifar import CIFAR, CIFARModel from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np BATCH_SIZE = 1 with tf.Session() as sess: data, model = MNIST(), MNISTModel("models/mnist", sess) data, model = CIFAR(), CIFARModel("models/cifar", sess) data, model = ImageNet(), InceptionModel(sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.test_data), BATCH_SIZE): pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i + BATCH_SIZE], 1))
config.gpu_options.allow_growth=True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) np.random.seed(1) tf.set_random_seed(1) if dataset == "MNIST": data = MNIST() model = MNISTModel("../1-Models/MNIST") x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) elif dataset == "CIFAR": data = CIFAR() model = CIFARModel("../1-Models/CIFAR") x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) training_accuracy = np.mean(np.argmax(model.model.predict(data.train_data), axis = 1) == np.argmax(data.train_labels, axis = 1)) print("Training Accuracy: " + str(training_accuracy)) testing_accuracy = np.mean(np.argmax(model.model.predict(data.test_data), axis = 1) == np.argmax(data.test_labels, axis = 1)) print("Testing Accuracy: " + str(testing_accuracy)) X = data.train_data X_adv = np.load("../2-AEs/" + dataset + "/train_" + mode + ".npy") pred_original = model.model.predict(X) pred_adv = model.model.predict(X_adv) print("Adversarial Success Rate: " + str(1 - np.mean(np.argmax(pred_original) == np.argmax(pred_adv)))) delta = X - X_adv
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet_Universal(SEED), InceptionModel(sess, True) #model = InceptionModel(sess, True) else: print('Please specify a valid dataset') #orig_img = np.load('ori_img_backup.npy') orig_img = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id)] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class = util.model_prediction_u( model, orig_img[:30] ) # take 30 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img = orig_img[np.where(orig_class == class_id)] if orig_img.shape[0] < arg_bsz: assert 'no enough valid inputs' orig_img = orig_img[:arg_bsz] np.save('original_imgsID' + str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label = class_id if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label = target_id else: target_label = true_label #orig_img, target = util.generate_data(data, class_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] ## parameter if orig_img.ndim == 3 or orig_img.shape[0] == 1: d = orig_img.size # feature dim else: d = orig_img[0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[:, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label] = 0 other_prob = np.amax(attack_prob_tmp, 1) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]), delta_adv) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: if (true_label != np.argmax(attack_prob, 1)).all(): print( "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1))) else: sr = np.sum( true_label != np.argmax(attack_prob, 1)) / arg_bsz print( "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1), sr)) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush()
targets = np.array(targets) labels = np.array(labels) return inputs, targets, labels if __name__ == "__main__": dataset = "mnist" #"cifar" Targeted = True Iterations = 500 with tf.Session() as sess: if dataset == "mnist": data = MNIST() model = MNISTModel("/models/mnist", sess) elif dataset == "cifar": data = CIFAR() model = CIFARModel("models/cifar", sess) else: raise Exception("Invalid dataset!", dataset) # attack = CarliniL0(sess, model,targeted = Targeted,max_iterations=1000) # attack = Leastpixel_attack(sess, model, max_iterations=1000) attack = CarliniL0_batch(sess, model, targeted=Targeted, max_iterations=1000) # attack = LPA_attack(sess, model, max_iterations=2000, targeted=Targeted) # attack = LPA_attack2(sess, model, max_iterations=4000,targeted=Targeted) # attack = LPA_attack_batch(sess, model, max_iterations=2000,targeted=Targeted) if Targeted: length = 10 else: