def make_fgsm(sess, env, X_data, y_data, epochs=1, eps=0.01, batch_size=128):
    """
    Generate FGSM by running env.x_fgsm.
    """
    n_sample = X_data.shape[0]
    n_batch = (n_sample + batch_size - 1) // batch_size
    X_adv = np.empty_like(X_data)

    with tf.variable_scope('model', reuse=True):
        env.x_fgsm = fgsm(simple_mlp,
                          env.x,
                          epochs=env.fgsm_epochs,
                          eps=env.fgsm_eps)

    for batch in range(n_batch):
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        adv = sess.run(env.x_fgsm,
                       feed_dict={
                           env.x: X_data[start:end],
                           env.fgsm_eps: eps,
                           env.fgsm_epochs: epochs
                       })
        X_adv[start:end] = adv
    return X_adv, y_data
Ejemplo n.º 2
0
def defense_wrapper(model,
                    criterion,
                    X,
                    defense,
                    epsilon=None,
                    step_size=None,
                    num_iter=None):

    model.aux = True
    if defense == 'fgsm':
        inv_delta = fgsm(model,
                         lambda model, X: -criterion(model, X),
                         X,
                         epsilon=epsilon)
    elif defense == 'pgd_linf':
        inv_delta = pgd_linf(model,
                             lambda model, X: -criterion(model, X),
                             X,
                             epsilon=epsilon,
                             step_size=step_size,
                             num_iter=num_iter)
    elif defense == 'inject_noise':
        inv_delta = inject_noise(X, epsilon)
    else:
        raise TypeError("Unrecognized defense name: {}".format(defense))
    model.aux = False
    # model.eval()
    return inv_delta
Ejemplo n.º 3
0
def main():
    # Setup model, dataset
    model = MLP(args.unit, 10)
    chainer.cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()
    xp = chainer.cuda.get_array_module(model)
    chainer.serializers.load_npz(args.model, model)
    _, test_mnist = chainer.datasets.get_mnist()

    # Fast Gradient Sign Method (simple)
    images = sample(test_mnist, N_gen)
    adv_images, adv_filter = fgsm(model, images, eps=0.2)
    prob = F.softmax(model(adv_images), axis=1).data
    visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size,
              'fgsm.png')
    visualize(cupy.asnumpy(adv_filter), cupy.asnumpy(prob), img_size,
              'fgsm_filter.png')
Ejemplo n.º 4
0
    images, _ = test_mnist[np.random.choice(len(dataset), n_samples)]
    images = chainer.cuda.to_gpu(images, args.gpu)
    return images


# Setup model, dataset
model = MLP(args.unit, 10)
chainer.cuda.get_device_from_id(args.gpu).use()
model.to_gpu()
xp = chainer.cuda.get_array_module(model)
chainer.serializers.load_npz(args.model, model)
_, test_mnist = chainer.datasets.get_mnist()

# Fast Gradient Sign Method (simple)
images = sample(test_mnist, N_gen)
adv_images = fgsm(model, images, eps=0.2)
prob = F.softmax(model(adv_images), axis=1).data
visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'fgsm.png')

# Fast Gradient Sign Method (iterative)
images = sample(test_mnist, N_gen)
adv_images = fgsm(model, images, eps=0.01, iterations=20)
prob = F.softmax(model(adv_images), axis=1).data
visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size,
          'fgsm_iterative.png')

# Target class Gradient Sign Method (least-likely)
images = sample(test_mnist, N_gen)
adv_images = tgsm(model, images, eps=0.15)
prob = F.softmax(model(adv_images), axis=1).data
visualize(cupy.asnumpy(adv_images), cupy.asnumpy(prob), img_size, 'tgsm.png')
Ejemplo n.º 5
0
    # score of the classifier's accuracy during each type of attack to compare
    # afterwards

    # First compute gradients
    grad = gradients(W, b, X, Y)

    Y = np.argmax(Y, axis=1)

    # 0. original example (not an attack!)
    Y_hat_original = np.argmax(forward(W, b, X), axis=1)
    score = evaluate(Y, Y_hat_original)
    print("[original]\tAccuracy {}%".format(score))
    print(Y_hat_original)

    # 1. fast-gradient sign method (FGSM)
    X_fgsm = fgsm(X, grad["dX"], 2 * EPSILON)
    Y_hat_fgsm = np.argmax(forward(W, b, X_fgsm), axis=1)
    score = evaluate(Y, Y_hat_fgsm)
    print("[  FGSM]\tAccuracy {}%".format(score))
    print(Y_hat_fgsm)

    # 2. targeted fast-gradient sign method (T-FGSM)
    Y_false = generate_false_labels(Y)
    X_tfgsm = targeted_fgsm(X, grad["dX"], 2 * EPSILON)
    Y_hat_tfgsm = np.argmax(forward(W, b, X_tfgsm), axis=1)
    score = evaluate(Y, Y_hat_tfgsm)
    print("[T-FGSM]\tAccuracy {}%".format(score))
    print(Y_hat_tfgsm)

    # 3. iterative fast-gradient sign method (I-FGSM)
    X_ifgsm = iterative_fgsm(X, grad["dX"], 10, 2 * EPSILON)[-1]
Ejemplo n.º 6
0
                    adversarial_examples = []
                    aya = 0
                    xmals, counts = np.unique(x_malware, axis=0, return_counts=True)
                    for i in range(xmals.shape[0]):

                        count=counts[i]
                        xmal=xmals[i]

                        # if i==0:
                        #     print(xmal)
                        if target_model.model.predict(xmal.reshape(1,-1)) == 0:
                            false_negative = false_negative + count
                            # print('this is a flase negative')
                        else:
                            xmal = torch.from_numpy(xmal).float().cuda()
                            result = attacks.fgsm(feature,xmal.unsqueeze(0) ,target_model.model.predict(xmal.unsqueeze(0).cpu().detach().numpy()), sarogate_model, nn.BCELoss(), eps=1)

                            distrotion = torch.sum(result - xmal)
                            if target_model.model.predict(result.cpu().detach().numpy()) == 1:
                                # print('failiure')
                                # print('====================================================================')

                                failiure = failiure + count
                            else:
                                # print('====================================================================')
                                #
                                # print(distrotion)
                                # print('====================================================================')
                                sucsses = sucsses + count
                                for i in range(count):
                                    distrotions.append(distrotion)
Ejemplo n.º 7
0
def get_adversarial(sess_trained, x, probs, image):
    x_adv = fgsm(x=x, predictions=probs, eps=0.3, clip_max=1.0, clip_min=-1.0)
    img_adv = sess_trained.run(x_adv, feed_dict={x: image})
    return img_adv
Ejemplo n.º 8
0
def main(_):
    FLAGS = flags.FLAGS

    if FLAGS.param_constructor == 'EmpiricalPrior':
        param_constructor = EmpiricalPrior
    elif FLAGS.param_constructor == 'SimpleMNISTParams':
        param_constructor = SimpleMNISTParams
    elif FLAGS.param_constructor == 'PaperParams':
        param_constructor = PaperParams
    else:
        print('Unsupported parameter struct specified')
        return
    attack_name = FLAGS.adv_attack_name.format(FLAGS.epsilon, FLAGS.norm_type)
    #Directory where the adv kernels and adv-specific graphs will be go
    adv_output_dir = os.path.join(FLAGS.adv_output, attack_name)
    #Directory where the adv dataset is/will be
    adv_data_dir = os.path.join(FLAGS.adv_data, attack_name)
    #Filename if attack is being generated and will be saved (as this filename), or of the file to be loaded if the attack already exists
    adv_data_file = FLAGS.adv_data_file.format(attack_name)

    np.random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)

    kernels_dir = os.path.join(FLAGS.output_dir, "kernels")
    if not os.path.exists(kernels_dir):
        os.makedirs(kernels_dir)
        print("Directory ", kernels_dir, " Created ")

    #Load all the data (train, test, val)
    X, Y, Xv, Yv, Xt, Yt = dataset.mnist_sevens_vs_twos(FLAGS.data_path,
                                                        noisy=True)

    #Parameters for the GP
    params = param_constructor(FLAGS.seed)
    params = verify_params(params)

    pu.dump(params, path.join(FLAGS.output_dir, 'params.pkl.gz'))
    #Create the GP
    with tf.device("GPU:0"):
        kern = ck.create_kern(params)

    #Calculate the training kernel and its inverse, if it doesn't exist.
    #If it already exists, just load it.
    #We do classification by treating it as a regression problem i.e. the conjugate method
    #So all we need is the inverse of the training kernel
    Kxx = initialize_kernel("Kxx", X, None, False, kern, kernels_dir)
    K_inv = initialize_Kxx_inverse(kernels_dir)
    #Center labels and make symmetric:
    #Don't center labels. Use one-hot vectors as probabilities
    #Y[Y == 0.] = -1
    K_inv_Y = K_inv @ Y

    if not FLAGS.adv_only:
        classify('test', Xt, Yt, 't', X, K_inv, K_inv_Y, kern, kernels_dir,
                 FLAGS.output_dir)
        classify('validation', Xv, Yv, 'v', X, K_inv, K_inv_Y, kern,
                 kernels_dir, FLAGS.output_dir)

    adv_kernels_dir = os.path.join(adv_output_dir, "kernels")
    if not os.path.exists(adv_kernels_dir):
        os.makedirs(adv_kernels_dir)
        print("Directory ", adv_kernels_dir, " Created ")

    #So at this point, we no longer need any of the kernels, just the inverse of the training.
    #Generate attack and save adversarial examples
    if FLAGS.generate_attack:
        print('Generating attack')
        #Yt_adv = np.copy(Yt)
        #Yt_adv[Yt_adv == 0.] = -1
        remove_kernels('a', adv_kernels_dir)

        if FLAGS.attack == 'fgsm':
            Xa = attacks.fgsm(K_inv_Y,
                              kern,
                              X,
                              Xt,
                              Yt,
                              seed=FLAGS.seed,
                              epsilon=FLAGS.epsilon,
                              norm_type=FLAGS.norm_type,
                              output_images=True,
                              max_output=128,
                              output_path=adv_data_dir,
                              adv_file_output=adv_data_file)
        else:
            Xa = attacks.fgsm_cleverhans(K_inv_Y,
                                         kern,
                                         X,
                                         Xt,
                                         Yt,
                                         epsilon=FLAGS.epsilon,
                                         norm_type=FLAGS.norm_type,
                                         output_images=True,
                                         max_output=128,
                                         output_path=adv_data_dir,
                                         adv_file_output=adv_data_file)
    else:
        print('Loading attack')
        Xa = np.load(path.join(adv_data_dir, adv_data_file))
        #Xa = Xa.reshape(-1, 28*28)

    #Calculate adversarial kernels and error
    classify('adv', Xa, Yt, 'a', X, K_inv, K_inv_Y, kern, adv_kernels_dir,
             adv_output_dir)
Ejemplo n.º 9
0
def createDatasetForAE(model, train_loader, test_loader, criterion, eps=0.25):
    adv_train_data_list = []
    clean_train_data_list = []
    train_label_list = []

    for i, (data, label) in enumerate(train_loader):
        clean_train_data_list.append(data)
        size = data.size()
        adv_train_data_list.append(
            fgsm(model,
                 data.view(-1, size[-1] * size[-2]),
                 label,
                 epsilon=eps,
                 loss_fn=criterion).view(size))
        train_label_list.append(
            data)  # Label for autoencoders are clean images
        # train_label_list.append(label)

    clean_test_data_list = []
    adv_test_data_list = []
    test_label_list = []

    for i, (data, label) in enumerate(test_loader):
        clean_test_data_list.append(data)
        size = data.size()
        adv_test_data_list.append(
            fgsm(model,
                 data.view(-1, size[-1] * size[-2]),
                 label,
                 epsilon=eps,
                 loss_fn=criterion).view(size))
        test_label_list.append(data)  # Label for autoencoders are clean images
        # test_label_list.append(label)

    clean_train_data_tensor = torch.cat(clean_train_data_list, 0)
    adv_train_data_tensor = torch.cat(adv_train_data_list, 0)
    train_label_tensor = torch.cat(train_label_list, 0)

    clean_test_data_tensor = torch.cat(clean_test_data_list, 0)
    adv_test_data_tensor = torch.cat(adv_test_data_list, 0)
    test_label_tensor = torch.cat(test_label_list, 0)

    total_train_data = torch.cat(
        [clean_train_data_tensor, adv_train_data_tensor],
        0)  # 1,20,000 images (60000 clean + 60000 adversarial images)
    total_train_label = torch.cat([train_label_tensor, train_label_tensor],
                                  0)  # 1,20,000 clean images are labels

    total_test_data = torch.cat([clean_test_data_tensor, adv_test_data_tensor],
                                0)
    total_test_label = torch.cat([test_label_tensor, test_label_tensor], 0)

    complete_data = {
        'total_train_data': total_train_data,
        'total_train_label': total_train_label,
        'total_test_data': total_test_data,
        'total_test_label': total_test_label
    }

    torch.save(complete_data, './data/data_for_autoencoder.pth')
    print('data saved')
Ejemplo n.º 10
0
if __name__ == "__main__":
    # Initialize weights and bias
    W, b = np.random.randn(NUM_CLASSES,
                           INPUT_DIM), np.random.randn(NUM_CLASSES)

    # Load data
    train_X, train_Y, test_X, test_Y = load_data()

    # Training
    for it in range(1, NUM_ITERATIONS + 1):
        # Generate training batch
        X_original, Y_original = get_batch(train_X, train_Y)

        # Generate adversarial examples by FGSM and T-FGSM
        grad = gradients(W, b, X_original, Y_original)
        X = np.concatenate((X_original, fgsm(X_original, grad["dX"], EPSILON)),
                           axis=0)
        Y = np.concatenate((Y_original, Y_original), axis=0)

        Y_false = generate_false_labels(Y_original)
        Y_false = np.eye(NUM_CLASSES)[Y_false]
        grad = gradients(W, b, X_original, Y_false)
        X = np.concatenate((X, targeted_fgsm(X_original, grad["dX"], EPSILON)))
        Y = np.concatenate((Y, Y_original), axis=0)

        indices = np.random.randint(0, X.shape[0], BATCH_SIZE)
        X = X[indices, :]
        Y = Y[indices, :]

        # Compute gradients, update weights and bias
        grad = gradients(W, b, X, Y)
Ejemplo n.º 11
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = load_mnist()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 784))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph by loading model
    path = ‘save/‘
    # the following are the config of the trained model
    alpha = 1.0; K_mc = 10; n_epoch = 500; nb_layers = 3
    nb_units = 1000; p = 0.5; wd = 1e-6; nb_classes = 10
    model_arch = 'mlp'; 
    dropout = 'MC'	# test mode for adversarial examples
    n_mc = 10	# number of MC samples used for adversarial test
    model = load_model(path, alpha, K_mc, n_epoch, nb_layers, \
                       nb_units, p, wd, nb_classes, model_arch, \
                       dropout, n_mc)

    # construct prediction tensor
    if dropout == 'MC':
        predictions = MC_dropout(model, x, n_mc = n_mc)
        string = ' (with MC, %d samples)' % n_mc
    else:
        predictions = model(x)
        string = ' (w/out MC)'

    # first check model accuracy on test data
    accuracy, entropy, _ = model_eval(sess, x, y, predictions, X_test, Y_test)
    print('Test accuracy on test data: ' + str(accuracy) + string)
    print('Test entropy on test data: ' + str(entropy) + string)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    stepsize = tf.placeholder(tf.float32, shape=())
    adv_x = fgsm(x, predictions, eps=stepsize, clip_min = 0.0, clip_max = 1.0)

    accuracy_list = []
    entropy_mean_list = []
    entropy_ste_list = []
    stepsize_list = np.arange(0.0, 0.501, 0.02)
    vis_images = []
    for val in stepsize_list:
        X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], \
                             stepsize_ph = stepsize, stepsize_val = val)
        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy, entropy_mean, entropy_ste = \
            model_eval(sess, x, y, predictions, X_test_adv, Y_test)
        accuracy_list.append(accuracy)
        entropy_mean_list.append(entropy_mean)
        entropy_ste_list.append(entropy_ste)
        vis_images.append(X_test_adv[0])

    print('Test accuracy on adversarial data: ' + str(accuracy) + string)
    print('Test entropy on adversarial data: ' + str(entropy_mean) + string)

    accuracy_list = np.array(accuracy_list)
    entropy_mean_list = np.array(entropy_mean_list)
    f, ax = plt.subplots(1, 3, figsize=(15, 4))
    ax[0].plot(stepsize_list, accuracy_list, 'b-')
    ax[1].plot(stepsize_list, entropy_mean_list, 'r-')
    ax[1].fill_between(stepsize_list, entropy_mean_list - entropy_ste_list, \
        entropy_mean_list + entropy_ste_list, color='r', alpha=0.3)
    plot_images(ax[2], np.array(vis_images), shape = (28, 28))
    plt.savefig('untargeted_attack.png', format='png')

    # save result
    filename = model_arch + '_nb_layers_' + str(nb_layers) \
             + '_nb_units_' + str(nb_units) + '_p_' + str(p) + \
             '_K_mc_' + str(K_mc) + '_alpha_' + str(alpha)
    if dropout == 'MC':
        filename = filename + '_n_mc_' + str(n_mc)
    elif dropout == 'pW':
        filename = filename + '_pW'
    else:
        filename = filename + '_no_drop'
    savepath = 'adv_test_results/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    with open(savepath + filename, 'w') as f:
        pickle.dump([stepsize_list, accuracy_list, entropy_mean_list, \
                     entropy_ste_list, vis_images], f)
    print('evaluation results saved in ' + savepath + filename)