def main(_): with tf.Session() as sess: K.set_session(sess) if FLAGS.dataset == 'MNIST': data, model = MNIST(), MNISTModel("models/mnist", sess) elif FLAGS.datset == 'Cifar': data, model = CIFAR(), CIFARModel("models/cifar", sess) def _model_fn(x, logits=False): ybar, logits_ = model.predict(x) if logits: return ybar, logits_ return ybar if FLAGS.dataset == 'MNIST': x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02) elif FLAGS.datset == 'Cifar': x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01) X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess) X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess) np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train) np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test) print("Legit/Adversarial training set") model.evaluate(data.train_data, data.train_labels) model.evaluate(X_adv_train, data.train_labels) print("Legit/Adversarial test set") model.evaluate(data.test_data, data.test_labels) model.evaluate(X_adv_test, data.test_labels)
def main(): batch_size = 128 tag = "GBP_0" model = CIFARModel().model # pure resnet data = CIFAR(tag) sgd = SGD(lr=0.00, momentum=0.9, nesterov=False) schedule = LearningRateScheduler(get_lr) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) datagen.fit(data.train_data) model.fit_generator(datagen.flow(data.train_data, data.train_labels, batch_size=batch_size), steps_per_epoch=data.train_data.shape[0] // batch_size, epochs=300, verbose=1, validation_data=(data.test_data, data.test_labels), callbacks=[schedule]) model.save_weights('Models/{}'.format(tag))
def load_model_and_dataset(dataset): if dataset == 'mnist': import mnist_NiN_bn model = mnist_NiN_bn.NiN_Model() saver = tf.train.Saver() checkpoint = tf.train.latest_checkpoint( '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/mnist_NiN/' ) saver.restore(sess, checkpoint) data = MNIST() elif dataset == "mnist2": import mnist_model model = mnist_model.MNISTModel() data = MNIST() elif dataset == 'cifar10': import cifar_NiN_bn model = cifar_NiN_bn.NiN_Model() saver = tf.train.Saver() checkpoint = tf.train.latest_checkpoint( '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/cifar_NiN/' ) saver.restore(sess, checkpoint) data = CIFAR() else: raise ValueError('unknown dataset') return model, data
def ATTACK(attack,dataset,first_index,settype, last_index, batch_size): """ Applies the saliency map attack against the specified model. Parameters ---------- model: str The name of the model used. attack: str The type of used attack (either "jsma", "wjsma" or "tjsma"). set_type: str The type of set used (either "train" or "test"). first_index: The index of the first image attacked. last_index: int The index of the last image attacked. batch_size: int The size of the image batches. """ if dataset == 'mnist': from cleverhans.dataset import MNIST x_set, y_set = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000).get_set(settype) print(x_set.shape) gamma = 0.155 file_path="/models/mnist" #elif model in CIFAR10_SETS: else: #from cleverhans.dataset import CIFAR10 #x_set, y_set = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000).get_set(settype) #gamma = 0.155 from setup_cifar import CIFAR data = CIFAR() x_set,y_set = data.test_data,data.test_labels print(x_set.shape) print(y_set) gamma = 0.155 file_path="./Least_pixel_attack/models/cifar" #else: # raise ValueError("Invalid model: " + model) generate_attacks( save_path="./Least_pixel_attack/models/data", file_path=file_path, dataset = dataset, x_set=x_set, y_set=y_set, attack=attack, gamma=gamma, first_index=first_index, last_index=last_index, batch_size=batch_size )
def main(args): if not os.path.isdir('models'): os.makedirs('models') if args['dataset'] == "mnist" or args['dataset'] == "all": train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50) if args['dataset'] == 'cifar' or args['dataset'] == 'all': train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
def main(args): # load data print("Loading data", args["dataset"]) if args["dataset"] == "mnist": data = MNIST() if args["train_data_source"]: print("Using data from {}".format(args["train_data_source"])) img = np.load("{}_data.npy".format(args["train_data_source"])) labels = np.load("{}_data.npy".format(args["train_data_source"])) data.validation_data = img data.validation_labels = labels elif args["dataset"] == "cifar10": data = CIFAR() elif args["dataset"] == "fe": data = FACIAL() elif args["dataset"] == "imagenet": # use ImageDataGenerate provided by Keras data = ImageNetDataGen(args["imagenet_train_dir"], args["imagenet_validation_dir"], data_augmentation=False) print("Done...") if args["dataset"] == "imagenet": data_shape = (None, 299, 299, 3) resize = 256 else: data_shape = data.train_data.shape resize = None print("Start training autoencoder") codec = CODEC(img_size=data_shape[1], num_channels=data_shape[3], compress_mode=args["compress_mode"], resize=resize) train_autoencoder(data, codec, batch_size=args["batch_size"], epochs=args["epochs"], saveFilePrefix=args["save_prefix"], train_imagenet=(args["dataset"] == "imagenet"))
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False): np.random.seed(1215) tf.set_random_seed(1215) random.seed(1215) if norm == '1': attack = EADL1 norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3)) elif norm == '2': attack = CarliniL2 norm_fn = lambda x: np.sum(x**2,axis=(1,2,3)) elif norm == 'i': attack = CarliniLi norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3)) if cifar: data = CIFAR() elif tinyimagenet: data = tinyImagenet() else: data = MNIST() model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2}) inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0) model.predict = model model.num_labels = 10 if cifar: model.image_size = 32 model.num_channels = 3 elif tinyimagenet: model.image_size = 64 model.num_channels = 3 model.num_labels = 200 else: model.image_size = 28 model.num_channels = 1 start_time = timer.time() attack = attack(sess, model, max_iterations = 1000) perturbed_input = attack.attack(inputs, targets) UB = np.average(norm_fn(perturbed_input-inputs)) return UB, (timer.time()-start_time)/len(inputs)
def main(): tags = ["GBP_0", "GBP_1", "GBP_2", "GBP_3", "GBP_4", "ORI"] model = prepare_resnet(load_weights='random', sess=None, num_classes=100) for tag in tags: data = CIFAR(tag) print( 'Accuracy on {} - Training : '.format(tag), np.mean( np.argmax(model.predict(data.train_data), axis=1) == np.argmax( data.train_labels, axis=1))) print( 'Accuracy on {} - Testing : '.format(tag), np.mean( np.argmax(model.predict(data.test_data), axis=1) == np.argmax( data.test_labels, axis=1)))
def convert(file_name, new_name, cifar=False): if not cifar: eq_weights, new_params = get_weights(file_name) data = MNIST() else: eq_weights, new_params = get_weights(file_name, inp_shape=(32, 32, 3)) data = CIFAR() model = Sequential() model.add(Flatten(input_shape=data.train_data.shape[1:])) for param in new_params: model.add(Dense(param)) model.add(Lambda(lambda x: tf.nn.relu(x))) model.add(Dense(10)) for i in range(len(eq_weights)): try: print(eq_weights[i][0].shape) except: pass model.layers[i].set_weights(eq_weights[i]) sgd = SGD(lr=0.01, decay=1e-5, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.save(new_name) acc = model.evaluate(data.validation_data, data.validation_labels)[1] printlog("Converting CNN to MLP") nlayer = file_name.split('_')[-3][0] filters = file_name.split('_')[-2] kernel_size = file_name.split('_')[-1] printlog( "model name = {0}, numlayer = {1}, filters = {2}, kernel size = {3}". format(file_name, nlayer, filters, kernel_size)) printlog("Model accuracy: {:.3f}".format(acc)) printlog("-----------------------------------") return acc
targets.append(np.eye(data.test_labels.shape[1])[j]) else: inputs.append(data.test_data[start + i]) targets.append(data.test_labels[start + i]) inputs = np.array(inputs) targets = np.array(targets) return inputs, targets if __name__ == "__main__": with tf.Session() as sess: data = CIFAR("ORI") Model = CIFARModel(restore="Models/CIFAR10_End2End_Trainable", end2end=True) attack = CarliniL2(sess, Model, batch_size=9, max_iterations=1000, confidence=0) inputs, targets = generate_data(data, samples=1, targeted=True, start=0, inception=False)
r = np.random.random_integers(0,9) l[i,r] = 1 return l def attack(data, name): sess = K.get_session() model = load_model("models/"+name, custom_objects={'fn': fn}) class Wrap: image_size = 28 if "mnist" in name else 32 num_labels = 10 num_channels = 1 if "mnist" in name else 3 def predict(self, x): return model(x) attack = CarliniL2(sess, Wrap(), batch_size=100, max_iterations=10000, binary_search_steps=5, initial_const=1, targeted=True) adv = attack.attack(data.test_data[:100], get_labs(data.test_labels[:100])) np.save("/tmp/"+name, adv) print(np.mean(np.sum((adv-data.test_data[:100])**2,axis=(1,2,3))**.5)) attack(MNIST(), "mnist") attack(MNIST(), "mnist_brelu") attack(MNIST(), "mnist_gaussian") attack(MNIST(), "mnist_gaussian_brelu") attack(CIFAR(), "cifar") attack(CIFAR(), "cifar_brelu") attack(CIFAR(), "cifar_gaussian") attack(CIFAR(), "cifar_gaussian_brelu")
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 50 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K) defended_logits = model_defended.get_logits(x) # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y=y_spsa, epsilon=epsilon, is_targeted=False, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack f = open(fname + ".txt", "w") sample = np.random.choice(data.test_data.shape[0], N, replace=False) x_sample = data.test_data[sample] y_sample = np.argmax(data.test_labels[sample], axis=1) logits_nat = sess.run(defended_logits, {x: x_sample}) f.write("Accuracy on Natural Images: " + str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n") pred_adv = -1.0 * np.ones((N)) for i in range(N): x_real = x_sample[i].reshape(shape_spsa) x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]}) pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv})) f.write("Accuracy on Adversarial Images: " + str(np.mean(pred_adv == y_sample))) f.close()
def main(args): temp_encoder = encoder(level=args['level']) with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') #print('all_inputs : ', all_inputs.shape) #print('encoding_all : ',encoding_all.shape) os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 origin_correct = 0 adv_correct = 0 for i in range(all_true_ids.size): print(' adversarial_image_no: ', i) inputs = all_inputs[i:i + 1] encoding_inputs = encoding_all[i:i + 1] #print('encoding_inputs shape: ', encoding_inputs) targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(encoding_inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue origin_correct += np.argmax(labels, 1) == original_class[-1] img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 ##### llj encode_adv = np.transpose(adv, axes=(0, 3, 1, 2)) channel0, channel1, channel2 = encode_adv[:, 0, :, :], encode_adv[:, 1, :, :], encode_adv[:, 2, :, :] channel0, channel1, channel2 = temp_encoder.tempencoding( channel0), temp_encoder.tempencoding( channel1), temp_encoder.tempencoding(channel2) encode_adv = np.concatenate([channel0, channel1, channel2], axis=1) encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1)) #### llj adversarial_predict = model.model.predict(encode_adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) adv_correct += np.argmax(labels, 1) == adversarial_class[-1] success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush() print(' origin accuracy : ', 100.0 * origin_correct / all_true_ids.size) print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def run(file_name, n_samples, p_n, q_n, activation = 'relu', cifar=False, tinyimagenet=False): np.random.seed(1215) tf.set_random_seed(1215) random.seed(1215) keras_model = load_model(file_name, custom_objects={'fn':fn, 'tf':tf}) if tinyimagenet: model = CNNModel(keras_model, inp_shape = (64,64,3)) elif cifar: model = CNNModel(keras_model, inp_shape = (32,32,3)) else: model = CNNModel(keras_model) #Set correct linear_bounds function global linear_bounds if activation == 'relu': linear_bounds = relu_linear_bounds elif activation == 'ada': linear_bounds = ada_linear_bounds elif activation == 'sigmoid': linear_bounds = sigmoid_linear_bounds elif activation == 'tanh': linear_bounds = tanh_linear_bounds elif activation == 'arctan': linear_bounds = atan_linear_bounds upper_bound_conv.recompile() lower_bound_conv.recompile() compute_bounds.recompile() if cifar: inputs, targets, true_labels, true_ids, img_info = generate_data(CIFAR(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) elif tinyimagenet: inputs, targets, true_labels, true_ids, img_info = generate_data(tinyImagenet(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) else: inputs, targets, true_labels, true_ids, img_info = generate_data(MNIST(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) #0b01111 <- all #0b0010 <- random #0b0001 <- top2 #0b0100 <- least steps = 15 eps_0 = 0.05 summation = 0 warmup(model, inputs[0].astype(np.float32), eps_0, p_n, find_output_bounds) start_time = time.time() for i in range(len(inputs)): print('--- CNN-Cert: Computing eps for input image ' + str(i)+ '---') predict_label = np.argmax(true_labels[i]) target_label = np.argmax(targets[i]) weights = model.weights[:-1] biases = model.biases[:-1] shapes = model.shapes[:-1] W, b, s = model.weights[-1], model.biases[-1], model.shapes[-1] last_weight = (W[predict_label,:,:,:]-W[target_label,:,:,:]).reshape([1]+list(W.shape[1:])) weights.append(last_weight) biases.append(np.asarray([b[predict_label]-b[target_label]])) shapes.append((1,1,1)) #Perform binary search log_eps = np.log(eps_0) log_eps_min = -np.inf log_eps_max = np.inf for j in range(steps): LB, UB = find_output_bounds(weights, biases, shapes, model.pads, model.strides, inputs[i].astype(np.float32), np.exp(log_eps), p_n) print("Step {}, eps = {:.5f}, {:.6s} <= f_c - f_t <= {:.6s}".format(j,np.exp(log_eps),str(np.squeeze(LB)),str(np.squeeze(UB)))) if LB > 0: #Increase eps log_eps_min = log_eps log_eps = np.minimum(log_eps+1, (log_eps_max+log_eps_min)/2) else: #Decrease eps log_eps_max = log_eps log_eps = np.maximum(log_eps-1, (log_eps_max+log_eps_min)/2) if p_n == 105: str_p_n = 'i' else: str_p_n = str(p_n) print("[L1] method = CNN-Cert-{}, model = {}, image no = {}, true_id = {}, target_label = {}, true_label = {}, norm = {}, robustness = {:.5f}".format(activation,file_name, i, true_ids[i],target_label,predict_label,str_p_n,np.exp(log_eps_min))) summation += np.exp(log_eps_min) K.clear_session() eps_avg = summation/len(inputs) total_time = (time.time()-start_time)/len(inputs) print("[L0] method = CNN-Cert-{}, model = {}, total images = {}, norm = {}, avg robustness = {:.5f}, avg runtime = {:.2f}".format(activation,file_name,len(inputs),str_p_n,eps_avg,total_time)) return eps_avg, total_time
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet_Universal(SEED), InceptionModel(sess, True) #model = InceptionModel(sess, True) else: print('Please specify a valid dataset') #orig_img = np.load('ori_img_backup.npy') orig_img = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id)] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class = util.model_prediction_u( model, orig_img[:30] ) # take 30 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img = orig_img[np.where(orig_class == class_id)] if orig_img.shape[0] < arg_bsz: assert 'no enough valid inputs' orig_img = orig_img[:arg_bsz] np.save('original_imgsID' + str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label = class_id if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label = target_id else: target_label = true_label #orig_img, target = util.generate_data(data, class_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] ## parameter if orig_img.ndim == 3 or orig_img.shape[0] == 1: d = orig_img.size # feature dim else: d = orig_img[0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[:, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label] = 0 other_prob = np.amax(attack_prob_tmp, 1) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]), delta_adv) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: if (true_label != np.argmax(attack_prob, 1)).all(): print( "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1))) else: sr = np.sum( true_label != np.argmax(attack_prob, 1)) / arg_bsz print( "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1), sr)) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush()
## ## This program is licenced under the BSD 2-Clause licence, ## contained in the LICENCE file in this directory. from setup_cifar import CIFAR, CIFARModel from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np BATCH_SIZE = 1 with tf.Session() as sess: data, model = MNIST(), MNISTModel("models/mnist", sess) data, model = CIFAR(), CIFARModel("models/cifar", sess) data, model = ImageNet(), InceptionModel(sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.test_data), BATCH_SIZE): pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i + BATCH_SIZE], 1))
base_lr = 0.1 datagen.fit(data.train_data) model.fit_generator(datagen.flow(data.train_data, data.train_labels, batch_size=batch_size), steps_per_epoch=data.train_data.shape[0] // batch_size, epochs=num_epochs, verbose=1, validation_data=(data.validation_data, data.validation_labels), callbacks=[schedule]) print( 'Test accuracy:', np.mean( np.argmax(model.predict(data.test_data), axis=1) == np.argmax( data.test_labels, axis=1))) if file_name != None: model.save_weights(file_name) return model if dataset == "MNIST": train(MNIST(), MNISTModel, "MNIST", num_epochs=30) elif dataset == "CIFAR": train(CIFAR(), CIFARModel, "CIFAR", num_epochs=300)
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) bias = float(args[4]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str( K) + "_" + str(bias) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 1000 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias) defended_logits = model_defended.get_logits(x) # Get the predictions on the original images labels = np.argmax(data.test_labels[:N], axis=1) logits_real = sess.run(defended_logits, {x: data.test_data[:N]}) fp = (np.argmax(logits_real, axis=1) == 10) #False positives of the defense pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1), axis=1) #Original model prediction # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y_target=y_spsa, epsilon=epsilon, is_targeted=True, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack pred_adv = -1.0 * np.ones((N, 10)) for i in range(N): if i % 10 == 0: print(fname, " ", i) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() x_real = data.test_data[i].reshape(shape_spsa) # Try a targeted attack for each class other than the original network prediction and the adversarial class for y in range(10): if y != pred_undefended[i]: x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y}) pred_adv[i, y] = np.argmax(sess.run(defended_logits, {x: x_adv})) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() analysis(fname)
# run training with given dataset, and print progress model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') if __name__ == "__main__": import argparse ap = argparse.ArgumentParser() ap.add_argument('-d', '--dataset', type=str, default="mnist") args = vars(ap.parse_args()) if "mnist" in args["dataset"]: MNIST() if "cifar" in args["dataset"]: CIFAR() #train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=1, lr=0.1, decay=1e-3) #train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=1, lr=0.2, decay=1e-3)
# train the student model at temperature t student = train(data, file_name, params, num_epochs, batch_size, train_temp, init=file_name + "_init") # and finally we predict at temperature 1 predicted = student.predict(data.train_data) print(predicted) if not os.path.isdir('models'): os.makedirs('models') train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=2) train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=2) train_distillation(MNIST(), "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200], num_epochs=2, train_temp=100) train_distillation(CIFAR(), "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256], num_epochs=2, train_temp=100)
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) image_id_set = np.random.choice(range(1000), args["image_number"] * 3, replace=False) #image_id_set = np.random.randint(1, 1000, args["image_number"] ) arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet(SEED), InceptionModel(sess, True) else: print('Please specify a valid dataset') succ_count, ii, iii = 0, 0, 0 final_distortion_count,first_iteration_count, first_distortion_count = [], [], [] while iii < args["image_number"]: ii = ii + 1 image_id = image_id_set[ii] # if image_id!= 836: continue # for test only orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) ## orig_class: predicted label; if arg_targeted_attack: ### target attack target_label = np.remainder(orig_class + 1, 10) else: target_label = orig_class orig_img, target = util.generate_data(data, image_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] true_label_list = np.argmax(data.test_labels, axis=1) true_label = true_label_list[image_id] print("Image ID:{}, infer label:{}, true label:{}".format( image_id, orig_class, true_label)) if true_label != orig_class: print( "True Label is different from the original prediction, pass!" ) continue else: iii = iii + 1 print('\n', iii, '/', args["image_number"]) ## parameter d = orig_img.size # feature dim print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (1, d)) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation #delta_adv = np.random.uniform(-16/255,16/255,(1,d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = np.arctanh( 2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) * 0.999999) else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5) # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.9 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[ i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt v_hat = np.maximum(v_hat, v) #print(np.mean(v_hat)) delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[0, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[0, target_label] = 0 other_prob = np.amax(attack_prob_tmp) if args["print_iteration"]: if np.remainder(i + 1, 1) == 0: if true_label != np.argmax(attack_prob): print( "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) else: print( "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( image_id, arg_mode, true_label, np.argmax(attack_prob), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if (np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if (np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data succ_count = succ_count + 1 final_distortion_count.append(l2s_loss_all[-1]) first_distortion_count.append(first_distortion) first_iteration_count.append(first_iteration) suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush() print('succ rate:', succ_count / args["image_number"]) print('average first success l2', np.mean(first_distortion_count)) print('average first itrs', np.mean(first_iteration_count)) print('average l2:', np.mean(final_distortion_count), ' best l2:', np.min(final_distortion_count), ' worst l2:', np.max(final_distortion_count))
def main(args): with tf.Session() as sess: print("Loading data and classification model: {}".format( args["dataset"])) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_softmax=True) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_softmax=True) elif args['dataset'] == "imagenet": # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True) data, model = ImageNetDataNP(), InceptionModel(sess, use_softmax=True) # elif args['dataset'] == "imagenet_np": if len(data.test_labels) < args["num_img"]: raise Exception("No enough data, only have {} but need {}".format( len(data.test_labels), args["num_img"])) if args["attack_single_img"]: # manually setup attack set # attacking only one image with random attack] orig_img = data.test_data orig_labels = data.test_labels orig_img_id = np.array([1]) if args["attack_type"] == "targeted": target_labels = [ np.eye(model.num_labels)[args["single_img_target_label"]] ] else: target_labels = orig_labels else: # generate attack set if args["dataset"] == "imagenet" or args[ "dataset"] == "imagenet_np": shift_index = True else: shift_index = False if args["random_target"] and (args["dataset"] == "imagenet" or args["dataset"] == "imagenet_np"): # find all possible class all_class = np.unique(np.argmax(data.test_labels, 1)) all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=all_class, shift_index=shift_index) elif args["random_target"]: # random target on all possible classes class_num = data.test_labels.shape[1] all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=list(range(class_num)), shift_index=shift_index) else: all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], shift_index=shift_index) # check attack data # for i in range(len(orig_img_id)): # tar_lab = np.argmax(target_labels[i]) # orig_lab = np.argmax(orig_labels[i]) # print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i])) # attack related settings if args["attack_method"] == "zoo" or args[ "attack_method"] == "autozoom_bilin": if args["img_resize"] is None: args["img_resize"] = model.image_size print( "Argument img_resize is not set and not using autoencoder, set to image original size:{}" .format(args["img_resize"])) if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae": if args["batch_size"] is None: args["batch_size"] = 128 print( "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}." .format(args["batch_size"])) else: if args["batch_size"] is not None: print("Argument batch_size is not used") args["batch_size"] = 1 # force to be 1 if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #_, decoder = util.load_codec(args["codec_prefix"]) if args["dataset"] == "mnist" or args["dataset"] == "cifar10": codec = CODEC(model.image_size, model.num_channels, args["compress_mode"], use_tanh=False) else: codec = CODEC(128, model.num_channels, args["compress_mode"]) print(args["codec_prefix"]) codec.load_codec(args["codec_prefix"]) decoder = codec.decoder print(decoder.input_shape) args["img_resize"] = decoder.input_shape[1] print("Using autoencoder, set the attack image size to:{}".format( args["img_resize"])) # setup attack if args["attack_method"] == "zoo": blackbox_attack = ZOO(sess, model, args) elif args["attack_method"] == "zoo_ae": blackbox_attack = ZOO_AE(sess, model, args, decoder) elif args["attack_method"] == "autozoom_bilin": blackbox_attack = AutoZOOM_BiLIN(sess, model, args) elif args["attack_method"] == "autozoom_ae": blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec) save_prefix = os.path.join(args["save_path"], args["dataset"], args["attack_method"], args["attack_type"]) os.system("mkdir -p {}".format(save_prefix)) total_success = 0 l2_total = 0 for i in range(all_orig_img_id.size): orig_img = all_orig_img[i:i + 1] target = all_target_labels[i:i + 1] label = all_orig_labels[i:i + 1] target_class = np.argmax(target) true_class = np.argmax(label) test_index = all_orig_img_id[i] # print information print( "[Info][Start]{}: test_index:{}, true label:{}, target label:{}" .format(i, test_index, true_class, target_class)) if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #print ae info if args["dataset"] == "mnist" or args["dataset"] == "cifar10": temp_img = all_orig_img[i:i + 1] else: temp_img = all_orig_img[i] temp_img = (temp_img + 0.5) * 255 temp_img = scipy.misc.imresize(temp_img, (128, 128)) temp_img = temp_img / 255 - 0.5 temp_img = np.expand_dims(temp_img, axis=0) encode_img = codec.encoder.predict(temp_img) decode_img = codec.decoder.predict(encode_img) diff_img = (decode_img - temp_img) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) timestart = time.time() adv_img = blackbox_attack.attack(orig_img, target) timeend = time.time() if len(adv_img.shape) == 3: adv_img = np.expand_dims(adv_img, axis=0) l2_dist = np.sum((adv_img - orig_img)**2)**.5 adv_class = np.argmax(model.model.predict(adv_img)) success = False if args["attack_type"] == "targeted": if adv_class == target_class: success = True else: if adv_class != true_class: success = True if success: total_success += 1 l2_total += l2_dist print( "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}" .format(i, test_index, true_class, adv_class, success, l2_dist, total_success / (i + 1), 0 if total_success == 0 else l2_total / total_success)) # save images suffix = "id{}_testIndex{}_true{}_adv{}".format( i, test_index, true_class, adv_class) # original image save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix)) util.save_img(orig_img, save_name) save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix)) np.save(save_name, orig_img) # adv image save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix)) util.save_img(adv_img, save_name) save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix)) np.save(save_name, adv_img) # diff image save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix)) util.save_img((adv_img - orig_img) / 2, save_name) save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix)) np.save(save_name, adv_img - orig_img)
if init != None: model.load_weights(init) def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), nb_epoch=num_epochs, shuffle=True) if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50) train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50)
model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return model if not os.path.isdir('models'): os.makedirs('models') if __name__ == "__main__": train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=50, lr=0.1, decay=1e-3) train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=50, lr=0.2, decay=1e-3)
help='number of epochs') parser.add_argument('--overwrite', action='store_true', help='overwrite output file') args = parser.parse_args() print(args) nlayers = len(args.layer_parameters) + 1 if not args.modelfile: file_name = args.modelpath + "/" + args.model + "_" + str( nlayers ) + "layer_" + args.activation + "_" + args.layer_parameters[0] else: file_name = args.modelfile print("Model will be saved to", file_name) if os.path.isfile(file_name) and not args.overwrite: raise RuntimeError("model {} exists.".format(file_name)) if args.model == "mnist": data = MNIST() elif args.model == "cifar": data = CIFAR() train(data, file_name=file_name, params=args.layer_parameters, num_epochs=args.epochs, lr=args.lr, decay=args.wd, activation=args.activation, activation_param=args.leaky_slope, grad_reg=args.gradreg, dropout_rate=args.dropout)
def main(args): if args["model_type"] == "normal": load_robust = False else: load_robust = True simple_target_model = args[ "simple_target_model"] # if true, target model is simple CIAR10 model (LeNet) simple_local_model = True # if true, local models are simple CIFAR10 models (LeNet) # Set TF random seed to improve reproducibility tf.set_random_seed(args["seed"]) data = CIFAR() if not hasattr(K, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) x_test, y_test = CIFAR().test_data, CIFAR().test_labels all_trans_rate_ls = [] # store transfer rate of all seeds remain_trans_rate_ls = [ ] # store transfer rate of remaining seeds, used only in local model fine-tuning # Define input TF placeholders class_num = 10 image_size = 32 num_channels = 3 test_batch_size = 100 x = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels)) y = tf.placeholder(tf.float32, shape=(None, class_num)) # required by the local robust densenet model is_training = tf.placeholder(tf.bool, shape=[]) keep_prob = tf.placeholder(tf.float32) ########################### load the target model ########################################## if not load_robust: if simple_target_model: target_model_name = 'modelA' target_model = cifar10_models_simple(sess,test_batch_size, 0, use_softmax=True,x = x, y = y,\ load_existing=True,model_name=target_model_name) else: target_model_name = 'densenet' target_model = cifar10_models(sess,0,test_batch_size = test_batch_size,use_softmax=True,x = x, y = y,\ load_existing=True,model_name=target_model_name) accuracy = target_model.calcu_acc(x_test, y_test) print('Test accuracy of target model {}: {:.4f}'.format( target_model_name, accuracy)) else: if args["robust_type"] == "madry": target_model_name = 'madry_robust' model_dir = "CIFAR10_models/Robust_Deep_models/Madry_robust_target_model" # TODO: pur your own madry robust target model directory here target_model = Load_Madry_Model(sess, model_dir, bias=0.5, scale=255) elif args["robust_type"] == "zico": # Note: add zico cifar10 model will added in future target_model_name = 'zico_robust' model_dir = "" # TODO: put your own robust zico target model directory here target_model = Load_Zico_Model(model_dir=model_dir, bias=0.5, scale=255) else: raise NotImplementedError corr_preds = target_model.correct_prediction(x_test, np.argmax(y_test, axis=1)) print('Test accuracy of target robust model :{:.4f}'.format( np.sum(corr_preds) / len(x_test))) ##################################### end of load target model ################################### local_model_names = args["local_model_names"] robust_indx = [] normal_local_types = [] for loc_model_name in local_model_names: if loc_model_name == "adv_densenet" or loc_model_name == "adv_vgg" or loc_model_name == "adv_resnet": # normal_local_types.append(0) robust_indx.append(1) else: robust_indx.append(0) if loc_model_name == "modelB": normal_local_types.append(1) elif loc_model_name == "modelD": normal_local_types.append(3) elif loc_model_name == "modelE": normal_local_types.append(4) print("robust index: ", robust_indx) print("normal model types:", normal_local_types) local_model_folder = '' for ii in range(len(local_model_names)): if ii != len(local_model_names) - 1: local_model_folder += local_model_names[ii] + '_' else: local_model_folder += local_model_names[ii] nb_imgs = args["num_img"] # local model attack related params clip_min = -0.5 clip_max = 0.5 li_eps = args["cost_threshold"] alpha = 1.0 k = 100 a = 0.01 load_existing = True # load pretrained local models, if false, random model will be given with_local = args[ "with_local"] # if true, hybrid attack, otherwise, only baseline attacks if args["no_tune_local"]: stop_fine_tune_flag = True load_existing = True else: stop_fine_tune_flag = False if with_local: if load_existing: loc_adv = 'adv_with_tune' if args["no_tune_local"]: loc_adv = 'adv_no_tune' else: loc_adv = 'orig' # target type if args["attack_type"] == "targeted": is_targeted = True else: is_targeted = False sub_epochs = args["nb_epochs_sub"] # epcohs for local model training use_loc_adv_thres = args[ "use_loc_adv_thres"] # threshold for transfer attack success rate, it is used when we need to start from local adversarial seeds use_loc_adv_flag = True # flag for using local adversarial examples fine_tune_freq = args[ "fine_tune_freq"] # fine-tune the model every K images to save total model training time # store the attack input files (e.g., original image, target class) input_file_prefix = os.path.join(args["local_path"], target_model_name, args["attack_type"]) os.system("mkdir -p {}".format(input_file_prefix)) # save locally generated information local_info_file_prefix = os.path.join(args["local_path"], target_model_name, args["attack_type"], local_model_folder, str(args["seed"])) os.system("mkdir -p {}".format(local_info_file_prefix)) # attack_input_file_prefix = os.path.join(args["local_path"],target_model_name, # args["attack_type"]) # save bbox attack information out_dir_prefix = os.path.join(args["save_path"], args["attack_method"], target_model_name, args["attack_type"], local_model_folder, str(args["seed"])) os.system("mkdir -p {}".format(out_dir_prefix)) #### generate the original images and target classes #### target_ys_one_hot,orig_images,target_ys,orig_labels,_, trans_test_images = \ generate_attack_inputs(sess,target_model,x_test,y_test,class_num,nb_imgs,\ load_imgs=args["load_imgs"],load_robust=load_robust,\ file_path = input_file_prefix) #### end of genarating original images and target classes #### start_points = np.copy( orig_images) # either start from orig seed or local advs # store attack statistical info dist_record = np.zeros(len(orig_labels), dtype=float) query_num_vec = np.zeros(len(orig_labels), dtype=int) success_vec = np.zeros(len(orig_labels), dtype=bool) adv_classes = np.zeros(len(orig_labels), dtype=int) # local model related variables if simple_target_model: local_model_file_name = "cifar10_simple" elif load_robust: local_model_file_name = "cifar10_robust" else: local_model_file_name = "cifar10" # save_dir = 'model/'+local_model_file_name + '/' callbacks_ls = [] attacked_flag = np.zeros(len(orig_labels), dtype=bool) local_model_ls = [] if with_local: ###################### start loading local models ############################### local_model_names_all = [] # help to store complete local model names sss = 0 for model_name in local_model_names: if model_name == "adv_densenet" or model_name == "adv_vgg" or model_name == "adv_resnet": # tensoflow based robust local models loc_model = cifar10_tf_robust_models(sess, test_batch_size = test_batch_size, x = x,y = y, is_training=is_training,keep_prob=keep_prob,\ load_existing = True, model_name = model_name,loss = args["loss_function"]) accuracy = loc_model.calcu_acc(x_test, y_test) local_model_ls.append(loc_model) print('Test accuracy of model {}: {:.4f}'.format( model_name, accuracy)) sss += 1 else: # keras based local normal models if simple_local_model: type_num = normal_local_types[sss] if model_name == 'resnet_v1' or model_name == 'resnet_v2': depth_s = [20, 50, 110] else: depth_s = [0] for depth in depth_s: # model_name used for loading models if model_name == 'resnet_v1' or model_name == 'resnet_v2': model_load_name = model_name + str(depth) else: model_load_name = model_name local_model_names_all.append(model_load_name) if not simple_local_model: loc_model = cifar10_models(sess,depth,test_batch_size = test_batch_size,use_softmax = True, x = x,y = y,\ load_existing = load_existing, model_name = model_name,loss = args["loss_function"]) else: loc_model = cifar10_models_simple(sess,test_batch_size,type_num,use_softmax = True, x = x,y = y,\ is_training=is_training,keep_prob=keep_prob,load_existing = load_existing, model_name = model_name, loss = args["loss_function"]) local_model_ls.append(loc_model) opt = keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) loc_model.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) orig_images_nw = orig_images orig_labels_nw = orig_labels if args["no_save_model"]: if not load_existing: loc_model.model.fit( orig_images_nw, orig_labels_nw, batch_size=args["train_batch_size"], epochs=sub_epochs, verbose=0, validation_data=(x_test, y_test), shuffle=True) else: print( "Saving local model is yet to be implemented, please check back later, system exiting!" ) sys.exit(0) # TODO: fix the issue of loading pretrained model first and then finetune the model # if load_existing: # filepath = save_dir + model_load_name + '_pretrained.h5' # else: # filepath = save_dir + model_load_name + '.h5' # checkpoint = ModelCheckpoint(filepath=filepath, # monitor='val_acc', # verbose=0, # save_best_only=True) # callbacks = [checkpoint] # callbacks_ls.append(callbacks) # if not load_existing: # print("Train on %d data and validate on %d data" % (len(orig_labels_nw),len(y_test))) # loc_model.model.fit(orig_images_nw, orig_labels_nw, # batch_size=args["train_batch_size"], # epochs=sub_epochs, # verbose=0, # validation_data=(x_test, y_test), # shuffle = True, # callbacks = callbacks) scores = loc_model.model.evaluate(x_test, y_test, verbose=0) accuracy = scores[1] print('Test accuracy of model {}: {:.4f}'.format( model_load_name, accuracy)) sss += 1 ##################### end of loading local models ###################################### ##################### Define Attack Graphs of local PGD attack ############################### local_attack_graph = LinfPGDAttack(local_model_ls, epsilon=li_eps, k=k, a=a, random_start=True, loss_func=args["loss_function"], targeted=is_targeted, robust_indx=robust_indx, x=x, y=y, is_training=is_training, keep_prob=keep_prob) ##################### end of definining graphsof PGD attack ########################## ##################### generate local adversarial examples and also store the local attack information ##################### if not args["load_local_AEs"]: # first do the transfer check to obtain local adversarial samples # generated local info can be used for batch attacks, # max_loss, min_loss, max_gap, min_gap etc are other metrics we explored for scheduling seeds based on local information if is_targeted: all_trans_rate, pred_labs, local_aes,pgd_cnt_mat, max_loss, min_loss, ave_loss, max_gap, min_gap, ave_gap\ = local_attack_in_batches(sess,start_points[np.logical_not(attacked_flag)],\ target_ys_one_hot[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) else: all_trans_rate, pred_labs, local_aes,pgd_cnt_mat, max_loss, min_loss, ave_loss, max_gap, min_gap, ave_gap\ = local_attack_in_batches(sess,start_points[np.logical_not(attacked_flag)],\ orig_labels[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) # calculate local adv loss used for scheduling seeds in batch attack... if is_targeted: adv_img_loss, free_idx = compute_cw_loss(sess,target_model,local_aes,\ target_ys_one_hot,targeted=is_targeted,load_robust=load_robust) else: adv_img_loss, free_idx = compute_cw_loss(sess,target_model,local_aes,\ orig_labels,targeted=is_targeted,load_robust=load_robust) # calculate orig img loss for scheduling seeds in baseline attack if is_targeted: orig_img_loss, free_idx = compute_cw_loss(sess,target_model,orig_images,\ target_ys_one_hot,targeted=is_targeted,load_robust=load_robust) else: orig_img_loss, free_idx = compute_cw_loss(sess,target_model,orig_images,\ orig_labels,targeted=is_targeted,load_robust=load_robust) pred_labs = np.argmax(target_model.predict_prob(local_aes), axis=1) if is_targeted: transfer_flag = np.argmax(target_ys_one_hot, axis=1) == pred_labs else: transfer_flag = np.argmax(orig_labels, axis=1) != pred_labs # save local aes np.save(local_info_file_prefix + '/local_aes.npy', local_aes) # store local info of local aes and original seeds: used for scheduling seeds in batch attacks np.savetxt(local_info_file_prefix + '/pgd_cnt_mat.txt', pgd_cnt_mat) np.savetxt(local_info_file_prefix + '/orig_img_loss.txt', orig_img_loss) np.savetxt(local_info_file_prefix + '/adv_img_loss.txt', adv_img_loss) np.savetxt(local_info_file_prefix + '/ave_gap.txt', ave_gap) else: local_aes = np.load(local_info_file_prefix + '/local_aes.npy') if is_targeted: tmp_labels = target_ys_one_hot else: tmp_labels = orig_labels pred_labs = np.argmax(target_model.predict_prob( np.array(local_aes)), axis=1) print('correct number', np.sum(pred_labs == np.argmax(tmp_labels, axis=1))) all_trans_rate = accuracy_score(np.argmax(tmp_labels, axis=1), pred_labs) ################################ end of generating local AEs and storing related information ####################################### if not is_targeted: all_trans_rate = 1 - all_trans_rate print('** Transfer Rate: **' + str(all_trans_rate)) if all_trans_rate > use_loc_adv_thres: print("Updated the starting points to local AEs....") start_points[np.logical_not(attacked_flag)] = local_aes use_loc_adv_flag = True # independent test set for checking transferability: for experiment purpose and does not count for query numbers if is_targeted: ind_all_trans_rate,_,_,_,_,_,_,_,_,_ = local_attack_in_batches(sess,trans_test_images,target_ys_one_hot,eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) else: ind_all_trans_rate,_,_,_,_,_,_,_,_,_ = local_attack_in_batches(sess,trans_test_images,orig_labels,eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) # record the queries spent by quering the local samples query_num_vec[np.logical_not(attacked_flag)] += 1 if not is_targeted: ind_all_trans_rate = 1 - ind_all_trans_rate print('** (Independent Set) Transfer Rate: **' + str(ind_all_trans_rate)) all_trans_rate_ls.append(ind_all_trans_rate) S = np.copy(start_points) S_label = target_model.predict_prob(S) S_label_cate = np.argmax(S_label, axis=1) S_label_cate = np_utils.to_categorical(S_label_cate, class_num) pre_free_idx = [] candi_idx_ls = [] # store the indices of images in the order attacked # these parameters are used to make sure equal number of instances from each class are selected # such that diversity of fine-tuning set is improved. However, it is not effective... per_cls_cnt = 0 cls_order = 0 change_limit = False max_lim_num = int(fine_tune_freq / class_num) # define the autozoom bbox attack graph if args["attack_method"] == "autozoom": # setup the autoencoders for autozoom attack codec = 0 args["img_resize"] = 8 # replace with your directory codec_dir = 'CIFAR10_models/cifar10_autoencoder/' # TODO: replace with your own cifar10 autoencoder directory encoder = load_model(codec_dir + 'whole_cifar10_encoder.h5') decoder = load_model(codec_dir + 'whole_cifar10_decoder.h5') encode_img = encoder.predict(data.test_data[100:101]) decode_img = decoder.predict(encode_img) diff_img = (decode_img - data.test_data[100:101]) diff_mse = np.mean(diff_img.reshape(-1)**2) # diff_mse = np.mean(np.sum(diff_img.reshape(-1,784)**2,axis = 1)) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) encode_img = encoder.predict(data.test_data[0:1]) decode_img = decoder.predict(encode_img) diff_img = (decode_img - data.test_data[0:1]) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) if args["attack_method"] == "autozoom": # define black-box model graph of autozoom autozoom_graph = AutoZOOM(sess, target_model, args, decoder, codec, num_channels, image_size, class_num) # main loop of hybrid attacks for itr in range(len(orig_labels)): print("#------------ Substitue training round {} ----------------#". format(itr)) # computer loss functions of seeds: no query is needed here because seeds are already queried before... if is_targeted: img_loss, free_idx = compute_cw_loss(sess,target_model,start_points,\ target_ys_one_hot,targeted=is_targeted,load_robust=load_robust) else: img_loss, free_idx = compute_cw_loss(sess,target_model,start_points,\ orig_labels,targeted=is_targeted,load_robust=load_robust) free_idx_diff = list(set(free_idx) - set(pre_free_idx)) print("new free idx found:", free_idx_diff) if len(free_idx_diff) > 0: candi_idx_ls.extend(free_idx_diff) pre_free_idx = free_idx if with_local: if len(free_idx) > 0: # free attacks are found attacked_flag[free_idx] = 1 success_vec[free_idx] = 1 # update dist and adv class if args['dist_metric'] == 'l2': dist = np.sum( (start_points[free_idx] - orig_images[free_idx])**2, axis=(1, 2, 3))**.5 elif args['dist_metric'] == 'li': dist = np.amax(np.abs(start_points[free_idx] - orig_images[free_idx]), axis=(1, 2, 3)) # print(start_points[free_idx].shape) adv_class = target_model.pred_class(start_points[free_idx]) adv_classes[free_idx] = adv_class dist_record[free_idx] = dist if np.amax( dist ) >= args["cost_threshold"] + args["cost_threshold"] / 10: print( "there are some problems in setting the perturbation distance!" ) sys.exit(0) print("Number of Unattacked Seeds: ", np.sum(np.logical_not(attacked_flag))) if attacked_flag.all(): # early stop when all seeds are sucessfully attacked break # define the seed generation process as a functon if args["sort_metric"] == "min": img_loss[attacked_flag] = 1e10 elif args["sort_metric"] == "max": img_loss[attacked_flag] = -1e10 candi_idx, per_cls_cnt, cls_order,change_limit,max_lim_num = select_next_seed(img_loss,attacked_flag,args["sort_metric"],\ args["by_class"],fine_tune_freq,class_num,per_cls_cnt,cls_order,change_limit,max_lim_num) print(candi_idx) candi_idx_ls.append(candi_idx) input_img = start_points[candi_idx:candi_idx + 1] if args["attack_method"] == "autozoom": # encoder decoder performance check encode_img = encoder.predict(input_img) decode_img = decoder.predict(encode_img) diff_img = (decode_img - input_img) diff_mse = np.mean(diff_img.reshape(-1)**2) else: diff_mse = 0.0 print("[Info][Start]: test_index:{}, true label:{}, target label:{}, MSE:{}".format(candi_idx, np.argmax(orig_labels[candi_idx]),\ np.argmax(target_ys_one_hot[candi_idx]),diff_mse)) ################## BEGIN: bbox attacks ############################ if args["attack_method"] == "autozoom": # perform bbox attacks if is_targeted: x_s, ae, query_num = autozoom_attack( autozoom_graph, input_img, orig_images[candi_idx:candi_idx + 1], target_ys_one_hot[candi_idx]) else: x_s, ae, query_num = autozoom_attack( autozoom_graph, input_img, orig_images[candi_idx:candi_idx + 1], orig_labels[candi_idx]) else: if is_targeted: x_s, query_num, ae = nes_attack(args,target_model,input_img,orig_images[candi_idx:candi_idx+1],\ np.argmax(target_ys_one_hot[candi_idx]), lower = clip_min, upper = clip_max) else: x_s, query_num, ae = nes_attack(args,target_model,input_img,orig_images[candi_idx:candi_idx+1],\ np.argmax(orig_labels[candi_idx]), lower = clip_min, upper = clip_max) x_s = np.squeeze(np.array(x_s), axis=1) ################## END: bbox attacks ############################ attacked_flag[candi_idx] = 1 # fill the query info, etc if len(ae.shape) == 3: ae = np.expand_dims(ae, axis=0) if args['dist_metric'] == 'l2': dist = np.sum((ae - orig_images[candi_idx])**2)**.5 elif args['dist_metric'] == 'li': dist = np.amax(np.abs(ae - orig_images[candi_idx])) adv_class = target_model.pred_class(ae) adv_classes[candi_idx] = adv_class dist_record[candi_idx] = dist if args["attack_method"] == "autozoom": # autozoom utilizes the query info of attack input, which is already done at the begining. added_query = query_num - 1 else: added_query = query_num query_num_vec[candi_idx] += added_query if dist >= args["cost_threshold"] + args["cost_threshold"] / 10: print("the distance is not optimized properly") sys.exit(0) if is_targeted: if adv_class == np.argmax(target_ys_one_hot[candi_idx]): success_vec[candi_idx] = 1 else: if adv_class != np.argmax(orig_labels[candi_idx]): success_vec[candi_idx] = 1 if attacked_flag.all(): print( "Early termination because all seeds are successfully attacked!" ) break ############################################################## ## Starts the section of substitute training and local advs ## ############################################################## if with_local: if not stop_fine_tune_flag: # augment the local model training data with target model labels print(np.array(x_s).shape) print(S.shape) S = np.concatenate((S, np.array(x_s)), axis=0) S_label_add = target_model.predict_prob(np.array(x_s)) S_label_add_cate = np.argmax(S_label_add, axis=1) S_label_add_cate = np_utils.to_categorical( S_label_add_cate, class_num) S_label_cate = np.concatenate((S_label_cate, S_label_add_cate), axis=0) # empirically, tuning with model prediction probabilities given slightly better results. # if your bbox attack is decision based, only use the prediction labels S_label = np.concatenate((S_label, S_label_add), axis=0) # fine-tune the model if itr % fine_tune_freq == 0 and itr != 0: if len(S_label) > args["train_inst_lim"]: curr_len = len(S_label) rand_idx = np.random.choice(len(S_label), args["train_inst_lim"], replace=False) S = S[rand_idx] S_label = S_label[rand_idx] S_label_cate = S_label_cate[rand_idx] print( "current num: %d, max train instance limit %d is reached, performed random sampling to get %d samples!" % (curr_len, len(S_label), len(rand_idx))) sss = 0 for loc_model in local_model_ls: model_name = local_model_names_all[sss] if args["use_mixup"]: print( "Updates the training data with mixup strayegy!" ) S_nw = np.copy(S) S_label_nw = np.copy(S_label) S_nw, S_label_nw, _ = mixup_data(S_nw, S_label_nw, alpha=alpha) else: S_nw = S S_label_nw = S_label print("Train on %d data and validate on %d data" % (len(S_label_nw), len(y_test))) if args["no_save_model"]: loc_model.model.fit( S_nw, S_label_nw, batch_size=args["train_batch_size"], epochs=sub_epochs, verbose=0, validation_data=(x_test, y_test), shuffle=True) else: print( "Saving local model is yet to be implemented, please check back later, system exiting!" ) sys.exit(0) # callbacks = callbacks_ls[sss] # loc_model.model.fit(S_nw, S_label_nw, # batch_size=args["train_batch_size"], # epochs=sub_epochs, # verbose=0, # validation_data=(x_test, y_test), # shuffle = True, # callbacks = callbacks) scores = loc_model.model.evaluate(x_test, y_test, verbose=0) print('Test accuracy of model {}: {:.4f}'.format( model_name, scores[1])) sss += 1 if not attacked_flag.all(): # first check for not attacked seeds if is_targeted: remain_trans_rate, _, remain_local_aes,_, _, _, _, _, _, _\ = local_attack_in_batches(sess,orig_images[np.logical_not(attacked_flag)],\ target_ys_one_hot[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) else: remain_trans_rate, pred_labs, remain_local_aes,_, _, _, _, _, _, _\ = local_attack_in_batches(sess,orig_images[np.logical_not(attacked_flag)],\ orig_labels[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) if not is_targeted: remain_trans_rate = 1 - remain_trans_rate print('<<Ramaining Seed Transfer Rate>>:**' + str(remain_trans_rate)) # if transfer rate is higher than threshold, use local advs as starting points if remain_trans_rate <= 0 and use_loc_adv_flag: print( "No improvement for substitue training, stop fine-tuning!" ) stop_fine_tune_flag = False # transfer rate check with independent test examples if is_targeted: all_trans_rate, _, _, _, _, _, _, _, _, _\ = local_attack_in_batches(sess,trans_test_images,target_ys_one_hot,eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) else: all_trans_rate, _, _, _, _, _, _, _, _, _\ = local_attack_in_batches(sess,trans_test_images,orig_labels,eval_batch_size = test_batch_size,\ attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust) if not is_targeted: all_trans_rate = 1 - all_trans_rate print('<<Overall Transfer Rate>>: **' + str(all_trans_rate)) # if trans rate is not high enough, still start from orig seed; start from loc adv only # when trans rate is high enough, useful when you start with random model if not use_loc_adv_flag: if remain_trans_rate > use_loc_adv_thres: use_loc_adv_flag = True print("Updated the starting points....") start_points[np.logical_not( attacked_flag)] = remain_local_aes # record the queries spent on checking newly generated loc advs query_num_vec += 1 else: print("Updated the starting points....") start_points[np.logical_not( attacked_flag)] = remain_local_aes # record the queries spent on checking newly generated loc advs query_num_vec[np.logical_not(attacked_flag)] += 1 remain_trans_rate_ls.append(remain_trans_rate) all_trans_rate_ls.append(all_trans_rate) np.set_printoptions(precision=4) print("all_trans_rate:") print(all_trans_rate_ls) print("remain_trans_rate") print(remain_trans_rate_ls) # save the query information of all classes if not args["no_save_text"]: save_name_file = os.path.join(out_dir_prefix, "{}_num_queries.txt".format(loc_adv)) np.savetxt(save_name_file, query_num_vec, fmt='%d', delimiter=' ') save_name_file = os.path.join(out_dir_prefix, "{}_success_flags.txt".format(loc_adv)) np.savetxt(save_name_file, success_vec, fmt='%d', delimiter=' ')
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if (args['dataset'] == "cifar"): data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if (args['adversarial'] != "none"): model = MNISTModel("models/mnist_cw" + str(args['adversarial']), sess) if (args['temp'] and args['dataset'] == 'mnist'): model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if (args['temp'] and args['dataset'] == 'cifar'): model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], inception=inception, handpick=handpick, train=args['train'], seed=args['seed']) timestart = time.time() if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs) / args['batch_size'], "random instances.") if (args['train']): np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) return r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for i in range(0, len(inputs), args['batch_size']): pred = [] for j in range(i, i + args['batch_size']): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l2 = 1e10 dist_linf = 1e10 dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + args['batch_size']) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show']): for j in range(i, i + args['batch_size']): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) print('best_case_L1_mean', np.mean(d_best_l1)) print('best_case_L2_mean', np.mean(d_best_l2)) print('best_case_Linf_mean', np.mean(d_best_linf)) print('best_case_prob', np.mean(r_best)) print('average_case_L1_mean', np.mean(d_average_l1)) print('average_case_L2_mean', np.mean(d_average_l2)) print('average_case_Linf_mean', np.mean(d_average_linf)) print('average_case_prob', np.mean(r_average)) print('worst_case_L1_mean', np.mean(d_worst_l1)) print('worst_case_L2_mean', np.mean(d_worst_l2)) print('worst_case_Linf_mean', np.mean(d_worst_linf)) print('worst_case_prob', np.mean(r_worst))
# now train the teacher at the given temperature teacher = train(data, file_name+"_teacher", params, num_epochs, batch_size, train_temp, init=file_name+"_init") # evaluate the labels at temperature t predicted = teacher.predict(data.train_data) y = tf.nn.softmax(predicted/train_temp) print(y) data.train_labels = y # train the student model at temperature t student = train(data, file_name, params, num_epochs, batch_size, train_temp, init=file_name+"_init") # and finally we predict at temperature 1 predicted = student.predict(data.train_data) print(predicted) if not os.path.isdir('models'): os.makedirs('models') train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50) train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50) train_distillation(MNIST(), "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200], num_epochs=50, train_temp=100) train_distillation(CIFAR(), "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256], num_epochs=50, train_temp=100)
from setup_cifar import CIFAR, CIFARModel, CIFAR_WIDE from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np from encoder import encoder BATCH_SIZE = 1 with tf.Session() as sess: #data, model = MNIST(), MNISTModel("models/mnist", sess) #data, model = CIFAR(), CIFARModel("models/cifar", sess) #data, model = ImageNet(), InceptionModel(sess) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.encoding_test_data), BATCH_SIZE): pred = sess.run(y, {x: data.encoding_test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i + BATCH_SIZE], 1)) print(np.mean(r))
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data = MNIST() inception = False if (args['adversarial'] != "none"): model = MNISTModel( "models/mnist_cw" + str(args['adversarial']), sess) elif (args['temp']): model = MNISTModel( "models/mnist-distilled-" + str(args['temp']), sess) else: model = MNISTModel("models/mnist", sess) if (args['dataset'] == "cifar"): data = CIFAR() inception = False if (args['adversarial'] != "none"): model = CIFARModel( "models/cifar_cw" + str(args['adversarial']), sess) elif (args['temp']): model = CIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) else: model = CIFARModel("models/cifar", sess) if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet'], 2 * args['numimg']), InceptionModel(sess) inception = True inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=not args['untargeted'], target_num=args['targetnum'], inception=inception, train=args['train'], seed=args['seed']) timestart = time.time() if (args['restore_np']): if (args['train']): adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy') else: adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '.npy') else: if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() if args['untargeted']: num_targets = 1 else: num_targets = args['targetnum'] print("Took", timeend - timestart, "seconds to run", len(inputs) / num_targets, "random instances.") if (args['save_np']): if (args['train']): np.save(str(args['dataset']) + '_labels_train.npy', labels) np.save( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy', adv) else: np.save( str(args['dataset']) + '_' + str(args['attack'] + '.npy'), adv) r_best_ = [] d_best_l1_ = [] d_best_l2_ = [] d_best_linf_ = [] r_average_ = [] d_average_l1_ = [] d_average_l2_ = [] d_average_linf_ = [] r_worst_ = [] d_worst_l1_ = [] d_worst_l2_ = [] d_worst_linf_ = [] #Transferability Tests model_ = [] model_.append(model) if (args['targetmodel'] != "same"): if (args['targetmodel'] == "dd_100"): model_.append(MNISTModel("models/mnist-distilled-100", sess)) num_models = len(model_) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for m, model in enumerate(model_): r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] for i in range(0, len(inputs), num_targets): pred = [] for j in range(i, i + num_targets): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l1_index = 1e10 dist_linf = 1e10 dist_linf_index = 1e10 dist_l2 = 1e10 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): success = False if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): success = True else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): success = True if (success): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + num_targets) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) success_average = False if (args['untargeted']): if (np.argmax(pred_r, 1) != np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True else: if (np.argmax(pred_r, 1) == np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True if success_average: r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): failure = True if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): failure = False else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): failure = False if failure: r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show'] and m == (num_models - 1)): for j in range(i, i + num_targets): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) if (m != (num_models - 1)): lbl = "Src_" if (num_models > 2): lbl += str(m) + "_" else: lbl = "Tgt_" if (num_targets > 1): print(lbl + 'best_case_L1_mean', np.mean(d_best_l1)) print(lbl + 'best_case_L2_mean', np.mean(d_best_l2)) print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf)) print(lbl + 'best_case_prob', np.mean(r_best)) print(lbl + 'average_case_L1_mean', np.mean(d_average_l1)) print(lbl + 'average_case_L2_mean', np.mean(d_average_l2)) print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf)) print(lbl + 'average_case_prob', np.mean(r_average)) print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1)) print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2)) print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf)) print(lbl + 'worst_case_prob', np.mean(r_worst)) else: print(lbl + 'L1_mean', np.mean(d_average_l1)) print(lbl + 'L2_mean', np.mean(d_average_l2)) print(lbl + 'Linf_mean', np.mean(d_average_linf)) print(lbl + 'success_prob', np.mean(r_average))