def make_fgsm(sess, env, X_data, y_data, epochs=1, eps=0.01, batch_size=128): """ Generate FGSM by running env.x_fgsm. """ n_sample = X_data.shape[0] n_batch = (n_sample + batch_size - 1) // batch_size X_adv = np.empty_like(X_data) with tf.variable_scope('model', reuse=True): env.x_fgsm = fgsm(simple_mlp, env.x, epochs=env.fgsm_epochs, eps=env.fgsm_eps) for batch in range(n_batch): start = batch * batch_size end = min(n_sample, start + batch_size) adv = sess.run(env.x_fgsm, feed_dict={ env.x: X_data[start:end], env.fgsm_eps: eps, env.fgsm_epochs: epochs }) X_adv[start:end] = adv return X_adv, y_data
def defense_wrapper(model, criterion, X, defense, epsilon=None, step_size=None, num_iter=None): model.aux = True if defense == 'fgsm': inv_delta = fgsm(model, lambda model, X: -criterion(model, X), X, epsilon=epsilon) elif defense == 'pgd_linf': inv_delta = pgd_linf(model, lambda model, X: -criterion(model, X), X, epsilon=epsilon, step_size=step_size, num_iter=num_iter) elif defense == 'inject_noise': inv_delta = inject_noise(X, epsilon) else: raise TypeError("Unrecognized defense name: {}".format(defense)) model.aux = False # model.eval() return inv_delta
def main(): # Setup model, dataset model = MLP(args.unit, 10) chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() xp = chainer.cuda.get_array_module(model) chainer.serializers.load_npz(args.model, model) _, test_mnist = chainer.datasets.get_mnist() # Fast Gradient Sign Method (simple) images = sample(test_mnist, N_gen) adv_images, adv_filter = fgsm(model, images, eps=0.2) prob = F.softmax(model(adv_images), axis=1).data visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'fgsm.png') visualize(cupy.asnumpy(adv_filter), cupy.asnumpy(prob), img_size, 'fgsm_filter.png')
images, _ = test_mnist[np.random.choice(len(dataset), n_samples)] images = chainer.cuda.to_gpu(images, args.gpu) return images # Setup model, dataset model = MLP(args.unit, 10) chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() xp = chainer.cuda.get_array_module(model) chainer.serializers.load_npz(args.model, model) _, test_mnist = chainer.datasets.get_mnist() # Fast Gradient Sign Method (simple) images = sample(test_mnist, N_gen) adv_images = fgsm(model, images, eps=0.2) prob = F.softmax(model(adv_images), axis=1).data visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'fgsm.png') # Fast Gradient Sign Method (iterative) images = sample(test_mnist, N_gen) adv_images = fgsm(model, images, eps=0.01, iterations=20) prob = F.softmax(model(adv_images), axis=1).data visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'fgsm_iterative.png') # Target class Gradient Sign Method (least-likely) images = sample(test_mnist, N_gen) adv_images = tgsm(model, images, eps=0.15) prob = F.softmax(model(adv_images), axis=1).data visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'tgsm.png')
# score of the classifier's accuracy during each type of attack to compare # afterwards # First compute gradients grad = gradients(W, b, X, Y) Y = np.argmax(Y, axis=1) # 0. original example (not an attack!) Y_hat_original = np.argmax(forward(W, b, X), axis=1) score = evaluate(Y, Y_hat_original) print("[original]\tAccuracy {}%".format(score)) print(Y_hat_original) # 1. fast-gradient sign method (FGSM) X_fgsm = fgsm(X, grad["dX"], 2 * EPSILON) Y_hat_fgsm = np.argmax(forward(W, b, X_fgsm), axis=1) score = evaluate(Y, Y_hat_fgsm) print("[ FGSM]\tAccuracy {}%".format(score)) print(Y_hat_fgsm) # 2. targeted fast-gradient sign method (T-FGSM) Y_false = generate_false_labels(Y) X_tfgsm = targeted_fgsm(X, grad["dX"], 2 * EPSILON) Y_hat_tfgsm = np.argmax(forward(W, b, X_tfgsm), axis=1) score = evaluate(Y, Y_hat_tfgsm) print("[T-FGSM]\tAccuracy {}%".format(score)) print(Y_hat_tfgsm) # 3. iterative fast-gradient sign method (I-FGSM) X_ifgsm = iterative_fgsm(X, grad["dX"], 10, 2 * EPSILON)[-1]
adversarial_examples = [] aya = 0 xmals, counts = np.unique(x_malware, axis=0, return_counts=True) for i in range(xmals.shape[0]): count=counts[i] xmal=xmals[i] # if i==0: # print(xmal) if target_model.model.predict(xmal.reshape(1,-1)) == 0: false_negative = false_negative + count # print('this is a flase negative') else: xmal = torch.from_numpy(xmal).float().cuda() result = attacks.fgsm(feature,xmal.unsqueeze(0) ,target_model.model.predict(xmal.unsqueeze(0).cpu().detach().numpy()), sarogate_model, nn.BCELoss(), eps=1) distrotion = torch.sum(result - xmal) if target_model.model.predict(result.cpu().detach().numpy()) == 1: # print('failiure') # print('====================================================================') failiure = failiure + count else: # print('====================================================================') # # print(distrotion) # print('====================================================================') sucsses = sucsses + count for i in range(count): distrotions.append(distrotion)
def get_adversarial(sess_trained, x, probs, image): x_adv = fgsm(x=x, predictions=probs, eps=0.3, clip_max=1.0, clip_min=-1.0) img_adv = sess_trained.run(x_adv, feed_dict={x: image}) return img_adv
def main(_): FLAGS = flags.FLAGS if FLAGS.param_constructor == 'EmpiricalPrior': param_constructor = EmpiricalPrior elif FLAGS.param_constructor == 'SimpleMNISTParams': param_constructor = SimpleMNISTParams elif FLAGS.param_constructor == 'PaperParams': param_constructor = PaperParams else: print('Unsupported parameter struct specified') return attack_name = FLAGS.adv_attack_name.format(FLAGS.epsilon, FLAGS.norm_type) #Directory where the adv kernels and adv-specific graphs will be go adv_output_dir = os.path.join(FLAGS.adv_output, attack_name) #Directory where the adv dataset is/will be adv_data_dir = os.path.join(FLAGS.adv_data, attack_name) #Filename if attack is being generated and will be saved (as this filename), or of the file to be loaded if the attack already exists adv_data_file = FLAGS.adv_data_file.format(attack_name) np.random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) kernels_dir = os.path.join(FLAGS.output_dir, "kernels") if not os.path.exists(kernels_dir): os.makedirs(kernels_dir) print("Directory ", kernels_dir, " Created ") #Load all the data (train, test, val) X, Y, Xv, Yv, Xt, Yt = dataset.mnist_sevens_vs_twos(FLAGS.data_path, noisy=True) #Parameters for the GP params = param_constructor(FLAGS.seed) params = verify_params(params) pu.dump(params, path.join(FLAGS.output_dir, 'params.pkl.gz')) #Create the GP with tf.device("GPU:0"): kern = ck.create_kern(params) #Calculate the training kernel and its inverse, if it doesn't exist. #If it already exists, just load it. #We do classification by treating it as a regression problem i.e. the conjugate method #So all we need is the inverse of the training kernel Kxx = initialize_kernel("Kxx", X, None, False, kern, kernels_dir) K_inv = initialize_Kxx_inverse(kernels_dir) #Center labels and make symmetric: #Don't center labels. Use one-hot vectors as probabilities #Y[Y == 0.] = -1 K_inv_Y = K_inv @ Y if not FLAGS.adv_only: classify('test', Xt, Yt, 't', X, K_inv, K_inv_Y, kern, kernels_dir, FLAGS.output_dir) classify('validation', Xv, Yv, 'v', X, K_inv, K_inv_Y, kern, kernels_dir, FLAGS.output_dir) adv_kernels_dir = os.path.join(adv_output_dir, "kernels") if not os.path.exists(adv_kernels_dir): os.makedirs(adv_kernels_dir) print("Directory ", adv_kernels_dir, " Created ") #So at this point, we no longer need any of the kernels, just the inverse of the training. #Generate attack and save adversarial examples if FLAGS.generate_attack: print('Generating attack') #Yt_adv = np.copy(Yt) #Yt_adv[Yt_adv == 0.] = -1 remove_kernels('a', adv_kernels_dir) if FLAGS.attack == 'fgsm': Xa = attacks.fgsm(K_inv_Y, kern, X, Xt, Yt, seed=FLAGS.seed, epsilon=FLAGS.epsilon, norm_type=FLAGS.norm_type, output_images=True, max_output=128, output_path=adv_data_dir, adv_file_output=adv_data_file) else: Xa = attacks.fgsm_cleverhans(K_inv_Y, kern, X, Xt, Yt, epsilon=FLAGS.epsilon, norm_type=FLAGS.norm_type, output_images=True, max_output=128, output_path=adv_data_dir, adv_file_output=adv_data_file) else: print('Loading attack') Xa = np.load(path.join(adv_data_dir, adv_data_file)) #Xa = Xa.reshape(-1, 28*28) #Calculate adversarial kernels and error classify('adv', Xa, Yt, 'a', X, K_inv, K_inv_Y, kern, adv_kernels_dir, adv_output_dir)
def createDatasetForAE(model, train_loader, test_loader, criterion, eps=0.25): adv_train_data_list = [] clean_train_data_list = [] train_label_list = [] for i, (data, label) in enumerate(train_loader): clean_train_data_list.append(data) size = data.size() adv_train_data_list.append( fgsm(model, data.view(-1, size[-1] * size[-2]), label, epsilon=eps, loss_fn=criterion).view(size)) train_label_list.append( data) # Label for autoencoders are clean images # train_label_list.append(label) clean_test_data_list = [] adv_test_data_list = [] test_label_list = [] for i, (data, label) in enumerate(test_loader): clean_test_data_list.append(data) size = data.size() adv_test_data_list.append( fgsm(model, data.view(-1, size[-1] * size[-2]), label, epsilon=eps, loss_fn=criterion).view(size)) test_label_list.append(data) # Label for autoencoders are clean images # test_label_list.append(label) clean_train_data_tensor = torch.cat(clean_train_data_list, 0) adv_train_data_tensor = torch.cat(adv_train_data_list, 0) train_label_tensor = torch.cat(train_label_list, 0) clean_test_data_tensor = torch.cat(clean_test_data_list, 0) adv_test_data_tensor = torch.cat(adv_test_data_list, 0) test_label_tensor = torch.cat(test_label_list, 0) total_train_data = torch.cat( [clean_train_data_tensor, adv_train_data_tensor], 0) # 1,20,000 images (60000 clean + 60000 adversarial images) total_train_label = torch.cat([train_label_tensor, train_label_tensor], 0) # 1,20,000 clean images are labels total_test_data = torch.cat([clean_test_data_tensor, adv_test_data_tensor], 0) total_test_label = torch.cat([test_label_tensor, test_label_tensor], 0) complete_data = { 'total_train_data': total_train_data, 'total_train_label': total_train_label, 'total_test_data': total_test_data, 'total_test_label': total_test_label } torch.save(complete_data, './data/data_for_autoencoder.pth') print('data saved')
if __name__ == "__main__": # Initialize weights and bias W, b = np.random.randn(NUM_CLASSES, INPUT_DIM), np.random.randn(NUM_CLASSES) # Load data train_X, train_Y, test_X, test_Y = load_data() # Training for it in range(1, NUM_ITERATIONS + 1): # Generate training batch X_original, Y_original = get_batch(train_X, train_Y) # Generate adversarial examples by FGSM and T-FGSM grad = gradients(W, b, X_original, Y_original) X = np.concatenate((X_original, fgsm(X_original, grad["dX"], EPSILON)), axis=0) Y = np.concatenate((Y_original, Y_original), axis=0) Y_false = generate_false_labels(Y_original) Y_false = np.eye(NUM_CLASSES)[Y_false] grad = gradients(W, b, X_original, Y_false) X = np.concatenate((X, targeted_fgsm(X_original, grad["dX"], EPSILON))) Y = np.concatenate((Y, Y_original), axis=0) indices = np.random.randint(0, X.shape[0], BATCH_SIZE) X = X[indices, :] Y = Y[indices, :] # Compute gradients, update weights and bias grad = gradients(W, b, X, Y)
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = load_mnist() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 784)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph by loading model path = ‘save/‘ # the following are the config of the trained model alpha = 1.0; K_mc = 10; n_epoch = 500; nb_layers = 3 nb_units = 1000; p = 0.5; wd = 1e-6; nb_classes = 10 model_arch = 'mlp'; dropout = 'MC' # test mode for adversarial examples n_mc = 10 # number of MC samples used for adversarial test model = load_model(path, alpha, K_mc, n_epoch, nb_layers, \ nb_units, p, wd, nb_classes, model_arch, \ dropout, n_mc) # construct prediction tensor if dropout == 'MC': predictions = MC_dropout(model, x, n_mc = n_mc) string = ' (with MC, %d samples)' % n_mc else: predictions = model(x) string = ' (w/out MC)' # first check model accuracy on test data accuracy, entropy, _ = model_eval(sess, x, y, predictions, X_test, Y_test) print('Test accuracy on test data: ' + str(accuracy) + string) print('Test entropy on test data: ' + str(entropy) + string) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) stepsize = tf.placeholder(tf.float32, shape=()) adv_x = fgsm(x, predictions, eps=stepsize, clip_min = 0.0, clip_max = 1.0) accuracy_list = [] entropy_mean_list = [] entropy_ste_list = [] stepsize_list = np.arange(0.0, 0.501, 0.02) vis_images = [] for val in stepsize_list: X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], \ stepsize_ph = stepsize, stepsize_val = val) # Evaluate the accuracy of the MNIST model on adversarial examples accuracy, entropy_mean, entropy_ste = \ model_eval(sess, x, y, predictions, X_test_adv, Y_test) accuracy_list.append(accuracy) entropy_mean_list.append(entropy_mean) entropy_ste_list.append(entropy_ste) vis_images.append(X_test_adv[0]) print('Test accuracy on adversarial data: ' + str(accuracy) + string) print('Test entropy on adversarial data: ' + str(entropy_mean) + string) accuracy_list = np.array(accuracy_list) entropy_mean_list = np.array(entropy_mean_list) f, ax = plt.subplots(1, 3, figsize=(15, 4)) ax[0].plot(stepsize_list, accuracy_list, 'b-') ax[1].plot(stepsize_list, entropy_mean_list, 'r-') ax[1].fill_between(stepsize_list, entropy_mean_list - entropy_ste_list, \ entropy_mean_list + entropy_ste_list, color='r', alpha=0.3) plot_images(ax[2], np.array(vis_images), shape = (28, 28)) plt.savefig('untargeted_attack.png', format='png') # save result filename = model_arch + '_nb_layers_' + str(nb_layers) \ + '_nb_units_' + str(nb_units) + '_p_' + str(p) + \ '_K_mc_' + str(K_mc) + '_alpha_' + str(alpha) if dropout == 'MC': filename = filename + '_n_mc_' + str(n_mc) elif dropout == 'pW': filename = filename + '_pW' else: filename = filename + '_no_drop' savepath = 'adv_test_results/' if not os.path.exists(savepath): os.makedirs(savepath) with open(savepath + filename, 'w') as f: pickle.dump([stepsize_list, accuracy_list, entropy_mean_list, \ entropy_ste_list, vis_images], f) print('evaluation results saved in ' + savepath + filename)