Example #1
0
def main(net_type):
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print "INFO: temporarily set 'image_dim_ordering' to 'th'"

    sess = get_session()
    keras.backend.set_session(sess)

    (train_xs, train_ys), (test_xs, test_ys) = data_cifar10.load_cifar10()
    print 'Loaded cifar10 data'

    x = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model, model_name = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type)
    if net_type == 'squared_resnet':
        model = adam_pretrain(model, model_name, train_xs, train_ys, 1, test_xs, test_ys)

    predictions = model(x)
    tf_model_train(sess, x, y, predictions, train_xs, train_ys, test_xs, test_ys,
                   data_augmentor=data_cifar10.augment_batch)

    save_model(model, model_name)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    test_xs_adv, = batch_eval(sess, [x], [adv_x], [test_xs])
    assert test_xs_adv.shape[0] == 10000, test_xs_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, test_xs_adv, test_ys)
    print'Test accuracy on adversarial examples: ' + str(accuracy)

    print "Repeating the process, using adversarial training"
    # Redefine TF model graph
    model_2, _ = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type)
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    # Perform adversarial training
    tf_model_train(sess, x, y, predictions_2, train_xs, train_ys, test_xs, test_ys,
                   predictions_adv=predictions_2_adv,
                   data_augmentor=data_cifar10.augment_batch)

    save_model(model, model_name+'_adv')

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on
    # the new model, which was trained using adversarial training
    test_xs_adv_2, = batch_eval(sess, [x], [adv_x_2], [test_xs])
    assert test_xs_adv_2.shape[0] == 10000, test_xs_adv_2.shape

    # Evaluate the accuracy of the adversarially trained model on adversarial examples
    accuracy_adv = tf_model_eval(sess, x, y, predictions_2, test_xs_adv_2, test_ys)
    print'Test accuracy on adversarial examples: ' + str(accuracy_adv)
Example #2
0
def basic_iterative_method(sess, model, X, Y, eps, eps_iter, nb_iter=50,
                           clip_min=None, clip_max=None, batch_size=256):
    """
    TODO
    :param sess:
    :param model: predictions or after-softmax
    :param X:
    :param Y:
    :param eps:
    :param eps_iter:
    :param nb_iter:
    :param clip_min:
    :param clip_max:
    :param batch_size:
    :return:
    """
    sess.run(tf.initialize_all_variables())
    # Define TF placeholders for the input and output
    x = tf.placeholder(tf.float32, shape=(None,)+X.shape[1:])
    y = tf.placeholder(tf.float32, shape=(None,)+Y.shape[1:])
    # results will hold the adversarial inputs at each iteration of BIM;
    # thus it will have shape (nb_iter, n_samples, n_rows, n_cols, n_channels)
    results = np.zeros((nb_iter, X.shape[0],) + X.shape[1:])
    # Initialize adversarial samples as the original samples, set upper and
    # lower bounds
    X_adv = X
    X_min = X_adv - eps
    X_max = X_adv + eps
    print('Running BIM iterations...')
    # "its" is a dictionary that keeps track of the iteration at which each
    # sample becomes misclassified. The default value will be (nb_iter-1), the
    # very last iteration.
    def f(val):
        return lambda: val
    its = defaultdict(f(nb_iter-1))
    # Out keeps track of which samples have already been misclassified
    out = set()
    for i in tqdm(range(nb_iter)):
        adv_x = fgsm(
            x, model(x), eps=eps_iter,
            clip_min=clip_min, clip_max=clip_max, y=y
        )
        X_adv, = batch_eval(
            sess, [x, y], [adv_x],
            [X_adv, Y], feed={K.learning_phase(): 0},
            args={'batch_size': batch_size}
        )
        X_adv = np.maximum(np.minimum(X_adv, X_max), X_min)
        results[i] = X_adv
        # check misclassifieds
        #predictions = model.predict_classes(X_adv, batch_size=512, verbose=0)
        predict = model.predict(X_adv)
        predict=np.argmax(predict,axis=1)
        misclassifieds = np.where(predict != Y.argmax(axis=1))[0]
        for elt in misclassifieds:
            if elt not in out:
                its[elt] = i
                out.add(elt)

    return its, results
Example #3
0
def fast_gradient_sign_method(sess,
                              model,
                              X,
                              Y,
                              eps,
                              clip_min=None,
                              clip_max=None,
                              batch_size=256):
    """
    TODO
    :param sess:
    :param model: predictions or after-softmax
    :param X:
    :param Y:
    :param eps:
    :param clip_min:
    :param clip_max:
    :param batch_size:
    :return:
    """
    # Define TF placeholders for the input and output
    x = tf.placeholder(tf.float32, shape=(None, ) + X.shape[1:])
    y = tf.placeholder(tf.float32, shape=(None, ) + Y.shape[1:])
    adv_x = fgsm(x,
                 model(x),
                 eps=eps,
                 clip_min=clip_min,
                 clip_max=clip_max,
                 y=y)
    X_adv, = batch_eval(sess, [x, y], [adv_x], [X, Y],
                        feed={K.learning_phase(): 0},
                        args={'batch_size': batch_size})

    return X_adv
def train_substitute(sess, x, y, bbox_preds, X_sub, Y_sub):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = substitute_model()
    preds_sub = model_sub(x)

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in range(FLAGS.data_aug):
        print("Epoch #" + str(rho))
        train_params = {
            'nb_epochs': FLAGS.nb_epochs_s,
            'batch_size': FLAGS.batch_size,
            'learning_rate': FLAGS.learning_rate
        }
        model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub),
                    init_all=False, verbose=False, args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < FLAGS.data_aug - 1:
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          FLAGS.lmbda)

            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            # First feed forward a denoising autoencoder.
            if args.ae:
                print("Denoising...")
                num_data = X_sub_prev.shape[0]
                autoencoder.visualize(sess, X_sub_prev.reshape(num_data, -1), "sub{}".format(rho))
                filtered_data = autoencoder.run(sess, X_sub_prev.reshape(num_data, -1))
                X_sub_prev = filtered_data.reshape(num_data, 28, 28, 1)
            if args.alg == "cnn":
                eval_params = {'batch_size': FLAGS.batch_size}
                bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                      args=eval_params)[0]
                # Note here that we take the argmax because the adversary
                # only has access to the label (not the probabilities) output
                # by the black-box model
                Y_sub_prev = np.argmax(bbox_val, axis=1)
            elif is_not_nn():
                x_sub_prev = X_sub_prev.reshape(X_sub_prev.shape[0], -1)
                Y_sub_prev = bbox_preds.predict(x_sub_prev)
            Y_sub[int(len(X_sub)/2):] = Y_sub_prev

    return model_sub, preds_sub
Example #5
0
 def get_activations(data):
   data_activations = {}
   for layer in layers:
     layer_sym = tf.layers.flatten(model.get_layer(x, layer))
     data_activations[layer] = batch_eval(sess, [x], [layer_sym], [data],
                                          args={'batch_size': FLAGS.batch_size})[0]
   return data_activations
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda,
              rng):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :param rng: numpy.random.RandomState instance
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    log_raw.info("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        log_raw.info("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub),
                    init_all=False, verbose=False, args=train_params,
                    rng=rng)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            log_raw.info("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

            log_raw.info("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #7
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """
    keras.layers.core.K.set_learning_phase(0)

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session and set as Keras backend session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    # sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST data
    X_train, Y_train, X_test, Y_test = data_mnist()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:FLAGS.holdout]
    Y_sub = np.argmax(Y_test[:FLAGS.holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[FLAGS.holdout:]
    Y_test = Y_test[FLAGS.holdout:]

    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    bbox = prep_bbox(X_train, Y_train, X_test, Y_test)

    print("Training the substitute model.")
    # Train substitute using method from https://arxiv.org/abs/1602.02697
    model_sub, preds_sub = train_sub(sess, x, y, bbox, X_sub, Y_sub)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    fgsm = FastGradientMethod(model_sub, sess=sess)

    # Craft adversarial examples using the substitute
    eval_params = {'batch_size': FLAGS.batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    X_test_adv, = batch_eval(sess, [x], [x_adv_sub], [X_test],
                             args=eval_params)
    x_test_adv = X_test_adv.reshape(X_test_adv.shape[0], -1)
    y_test = np.argmax(Y_test, axis=1)
    xg_adv = xgb.DMatrix(x_test_adv, label=y_test)
    pred_adv = bbox.predict(xg_adv)
    accuracy = np.sum(pred_adv == y_test) / y_test.shape[0]
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute: ' + str(accuracy))
def evaluate(model, sess, x, X_test, y_test, X_test_adv, attackType, print_image_index = [], testClean = False, saveFlag = True):
    from cleverhans.utils_tf import batch_eval

    eval_par = {'batch_size': 32}

    # Optionally test on clean examples
    if testClean:
        print("Clean Examples:")
        model_preds_clean = batch_eval(sess, [x], [model(x)], [X_test], args=eval_par)[0]
        printResults(model_preds_clean, y_test)
        print("")
    else:
        print("(Skipping clean examples)\n")

    # Evaluate results on adversarial examples
    print("Adversarial Examples:")
    model_preds = batch_eval(sess, [x], [model(x)], [X_test_adv], args=eval_par)[0]
    printResults(model_preds, y_test)
    np.save( "data/pgd_preds_" + attackType + ".npy" , model_preds)

    # Calculate L2 norm of pertrubations
    l2_norm = np.sum((X_test_adv - X_test)**2, axis=(1, 2, 3))**.5
    l2_norm_sum = mean_ci(l2_norm)
    print('Avg. L2 norm of perturbations: ' + '{0:.6f} '.format(l2_norm_sum[0]) + \
     '({0:.6f}'.format(l2_norm_sum[1]) + ' - {0:.6f})'.format(l2_norm_sum[2]))

    # Identify the most perturbed images from health and sick patients
    indMaxDiff_healthy = np.argmax(l2_norm[y_test[:,1] == 0])
    indMaxDiff_sick = np.argmax(l2_norm[y_test[:,1] == 1])
    indMaxDiff_sick_shifted = indMaxDiff_sick + np.nonzero(y_test[:,1] == 1)[0][0]
    print("Most perturbed images are " + str(indMaxDiff_healthy) + " and " + str(indMaxDiff_sick_shifted))

    # Optionally save the most perturbed images and also any images whose indices are in print_image_index
    if saveFlag:
        for ind in print_image_index:
            scipy.misc.imsave('example_images/normal_img_' + str(ind) + '.png',
                              deprocess_inception(X_test[ind]))
            scipy.misc.imsave('example_images/attack_pgd_img' + str(ind) + attackType + '.png',
                              deprocess_inception(X_test_adv[ind]))

        scipy.misc.imsave('example_images/biggest_attack_' + attackType + '_img' + str(indMaxDiff_healthy) + '.png',
                          deprocess_inception(X_test_adv[indMaxDiff_healthy]))
        scipy.misc.imsave('example_images/biggest_attack_' + attackType + '_img' + str(indMaxDiff_sick_shifted) + '.png',
                          deprocess_inception(X_test_adv[indMaxDiff_sick_shifted]))

    return
Example #9
0
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(FLAGS.data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': FLAGS.nb_epochs_s,
            'batch_size': FLAGS.batch_size,
            'learning_rate': FLAGS.learning_rate
        }
        model_train(sess,
                    x,
                    y,
                    preds_sub,
                    X_sub,
                    to_categorical(Y_sub),
                    init_all=False,
                    verbose=False,
                    args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < FLAGS.data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          FLAGS.lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub) / 2):]
            eval_params = {'batch_size': FLAGS.batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #10
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST data
    X_train, Y_train, X_test, Y_test = data_mnist()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:FLAGS.holdout]
    Y_sub = np.argmax(Y_test[:FLAGS.holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[FLAGS.holdout:]
    Y_test = Y_test[FLAGS.holdout:]

    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    bbox_preds = prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test)

    print("Training the substitute model.")
    # Train substitute using method from https://arxiv.org/abs/1602.02697
    substitute_preds = train_substitute(sess, x, y, bbox_preds, X_sub, Y_sub)

    # Craft adversarial examples using the substitute
    adv_x = fgsm(x, substitute_preds, eps=0.2)
    eval_params = {'batch_size': FLAGS.batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
    
    np.save("xorg_black.npy", X_test)
    np.save("xadv_black.npy",X_test_adv)
    np.save("ytest_black.npy",Y_test)
    np.save("xtrain.npy", X_train)
    np.save("ytrain.npy",Y_train)

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess, x, y, bbox_preds, X_test_adv, Y_test,
                          args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute: ' + str(accuracy))
Example #11
0
def train_substitute(bbox_preds, x_sub, y_sub, nb_classes,
              nb_epochs_s, batch_size, lr, data_aug, lmbda,
              aug_batch_size, rng, img_rows=28, img_cols=28,
              nchannels=1):
    model_sub = ModelSubstitute('model_s', nb_classes)
    preds_sub = model_sub.get_logits(x)
    loss_sub = CrossEntropy(model_sub, smoothing=0)

    print("Defined TensorFlow model graph for the substitute.")

    grads = jacobian_graph(preds_sub, x, nb_classes)

    for i in xrange(data_aug):
        print("Substitute training epoch #" + str(i))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': lr
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            train(sess, loss_sub, x_sub, to_categorical(y_sub, nb_classes),
                  init_all=False, args=train_params, rng=rng,
                  var_list=model_sub.get_params())

        if i < data_aug - 1:
            print("Augmenting substitute training data.")
            lmbda_coef = 2 * int(int(i / 3) != 0) - 1
            x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads,
                                          lmbda_coef * lmbda, aug_batch_size)

            print("Labeling substitute training data.")
            y_sub = np.hstack([y_sub, y_sub])
            x_sub_prev = x_sub[int(len(x_sub) / 2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev],
                                  args=eval_params)[0]

            y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1)
    show_plot(x_sub, y_sub)
    return model_sub, preds_sub, x_sub, y_sub
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """
    keras.layers.core.K.set_learning_phase(0)

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session and set as Keras backend session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    # sess = tf.Session()
    keras.backend.set_session(sess)

    if args.ae:
        autoencoder.restore(sess, args.ae) # Restore model weights from previously saved model

    # Get MNIST data
    X_train, Y_train, X_test, Y_test = data_mnist()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:FLAGS.holdout]
    Y_sub = np.argmax(Y_test[:FLAGS.holdout], axis=1)

    # Shrink training data.
    # X_train = X_train[:10000]
    # Y_train = Y_train[:10000]
    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[FLAGS.holdout:]
    Y_test = Y_test[FLAGS.holdout:]

    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    print("Preparing the black-box model.")
    if args.alg == "cnn":
        model, bbox = prep_cnn_bbox(sess, x, y, X_train, Y_train, X_test, Y_test)
    elif is_not_nn():
        bbox = prep_boost_bbox(X_train, Y_train, X_test, Y_test)

    print("Training the substitute model.")
    model_sub, preds_sub = train_substitute(sess, x, y, bbox, X_sub, Y_sub)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    fgsm = FastGradientMethod(model_sub, sess=sess)

    # Craft adversarial examples using the substitute
    print("Crafting the adversarial examples.")
    eval_params = {'batch_size': FLAGS.batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)
    X_test_adv, = batch_eval(sess, [x], [x_adv_sub], [X_test], args=eval_params)

    # Dump adversarial examples.
    example_file = "example/{}.data".format(model_name)
    with open(example_file, "wb") as f:
        pickle.dump(X_test_adv, f)

    if args.ae:
        print("Denoising...")
        num_data = X_test_adv.shape[0]
        autoencoder.visualize(sess, X_test_adv.reshape(num_data, -1), "adv")
        filtered_data = autoencoder.run(sess, X_test_adv.reshape(num_data, -1))
        X_test_adv = filtered_data.reshape(num_data, 28, 28, 1)
    # Evaluate the accuracy of the "black-box" model on adversarial examples
    if args.alg == "cnn":
        accuracy = model_eval(sess, x, y, bbox, X_test_adv, Y_test,
                              args=eval_params)
    elif is_not_nn():
        x_test_adv = X_test_adv.reshape(X_test_adv.shape[0], -1)
        y_test = np.argmax(Y_test, axis=1)
        accuracy = bbox.score(x_test_adv, y_test)

    print("Test adversarial accuracy = {}".format(accuracy))

    log_file = "log/{}.log".format(model_name)
    with open(log_file, "a") as f:
        if args.ae:
            f.write("{}. Test adversarial accuracy = {}\n".format(args.ae, accuracy))
        else:
            f.write("Test adversarial accuracy = {}\n".format(accuracy))
Example #13
0
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda,
              rng, substitute_model=None, dataset_name=None):
    """This function trains the substitute model as described in
        arxiv.org/abs/1602.02697
    Args:
        sess: TF session
        x: input TF placeholder
        y: output TF placeholder
        bbox_preds: output of black-box model predictions
        X_sub: initial substitute training data
        Y_sub: initial substitute training labels
        nb_classes: number of output classes
        nb_epochs_s: number of epochs to train substitute model
        batch_size: size of training batches
        learning_rate: learning rate for training
        data_aug: number of times substitute training data is augmented
        lmbda: lambda from arxiv.org/abs/1602.02697
        rng: numpy.random.RandomState instance
    
    Returns:
        model_sub: The substitute model function.
        preds_sub: The substitute prediction tensor.
    """

    model_sub = substitute_model
    used_vars = model_sub.get_params()

    if FLAGS.load_sub_model:
        try:
            path = tf.train.latest_checkpoint('classifiers/sub_model/{}'.format(dataset_name))
            saver = tf.train.Saver(var_list=used_vars)
            saver.restore(sess, path)
            print('[+] Sub model loaded successfully ...')

            pred_eval = model_sub.get_logits(x)
            return model_sub, pred_eval

        except:
            pass

    pred_train = model_sub.get_logits(x, dropout=True)
    pred_eval = model_sub.get_logits(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow.
    grads = jacobian_graph(pred_eval, x, nb_classes)

    train_params = {
        'nb_epochs': nb_epochs_s,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': 'classifiers/sub_model/{}'.format(dataset_name),
        'filename': 'model_{}'.format(FLAGS.sub_model)
    }

    # Train the substitute and augment dataset alternatively.
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        model_train(sess, x, y, pred_train, X_sub, convert_to_onehot(Y_sub),
                    init_all=False, args=train_params,
                    rng=rng, save=True)

        # If we are not at last substitute training iteration, augment dataset.
        if rho < data_aug - 1:

            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation.
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box.
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub) / 2):]
            eval_params = {'batch_size': batch_size}

            # To initialize the local variables of Defense-GAN.
            sess.run(tf.local_variables_initializer())

            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model.
            Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1)

    return model_sub, pred_eval
def mnist_blackbox(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_classes=10, batch_size=128,
                   learning_rate=0.001, nb_epochs=10, holdout=150, data_aug=6,
                   nb_epochs_s=10, lmbda=0.1, attack="fgsm", targeted=False):
    """
    MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :return: a dictionary with:
             * black-box model accuracy on test set
             * substitute model accuracy on test set
             * black-box model accuracy on adversarial examples transferred
               from the substitute model
    """
    keras.layers.core.K.set_learning_phase(0)

    # Dictionary used to keep track and return key accuracies
    accuracies = {}

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session and set as Keras backend session
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    keras.backend.set_session(sess)

    # Get MNIST data
    if DATASET == "mnist":
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
    else:
        X_train, Y_train, X_test, Y_test = data_cifar10()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:holdout]
    Y_sub = np.argmax(Y_test[:holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[holdout:]
    Y_test = Y_test[holdout:]

    X_test = X_test[:FLAGS.n_attack]
    Y_test = Y_test[:FLAGS.n_attack]

    # Define input and output TF placeholders
    if DATASET == "mnist":
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    # for feed targeted attack labels
    t_y = tf.placeholder(tf.float32, shape=(None, 10))

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    prep_bbox_out = prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
                              nb_epochs, batch_size, learning_rate)
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # Train substitute using method from https://arxiv.org/abs/1602.02697
    time_start = time.time()
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, X_sub, Y_sub,
                              nb_classes, nb_epochs_s, batch_size,
                              learning_rate, data_aug, lmbda)
    model_sub, preds_sub = train_sub_out
    time_end = time.time()
    print("Substitue model training time:", time_end - time_start)

    # Evaluate the substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, X_test, Y_test, args=eval_params)
    accuracies['sub'] = acc
    print('substitution model accuracy:', acc)

    # Find the correctly predicted labels
    original_predict = batch_eval(sess, [x], [bbox_preds], [X_test],
                          args=eval_params)[0]
    original_class = np.argmax(original_predict, axis = 1)
    true_class = np.argmax(Y_test, axis = 1)
    mask = true_class == original_class
    print(np.sum(mask), "out of", mask.size, "are correct labeled,", len(X_test[mask]))  

    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    wrap = KerasModelWrapper(model_sub)


    # Craft adversarial examples using the substitute
    if targeted and attack == "fgsm":
        # TODO: fix the batch size mess
        eval_params = {'batch_size': FLAGS.n_attack * 9}
    else:
        eval_params = {'batch_size': batch_size}

    adv_inputs = X_test
    ori_labels = Y_test

    # generate targeted labels, 9 for each test example
    if targeted:
        adv_ys = []
        targeted_class = []
        for i in range(0, X_test.shape[0]):
            for j in range(0,10):
                # skip the original image label
                if j == np.argmax(Y_test[i]):
                    continue
                adv_ys.append(np.eye(10)[j])
                targeted_class.append(j)
        # duplicate the inputs by 9 times
        adv_inputs = np.array([[instance] * 9 for instance in X_test],
                              dtype=np.float32)
        if DATASET == "mnist":
            adv_inputs = adv_inputs.reshape((X_test.shape[0] * 9, 28, 28, 1))
        else:
            adv_inputs = adv_inputs.reshape((X_test.shape[0] * 9, 32, 32, 3))
        # also update the mask
        mask = np.repeat(mask, 9)
        ori_labels = np.repeat(Y_test, 9, axis=0)
        adv_ys = np.array(adv_ys, dtype=np.float32)
    
    if attack == "fgsm":
        attacker_params = {'eps': 0.4, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
        # wrap = KerasModelWrapper(model)
        fgsm = FastGradientMethod(wrap, sess=sess)
        attacker = fgsm
        print("Running FGSM attack...")
        if targeted:
            attacker_params['y_target'] = t_y
        x_adv_sub = fgsm.generate(x, **attacker_params)
    else:
        print("Running Carlini and Wagner\'s L2 attack...")
        yname = "y"
        adv_ys = None
        # wrap = KerasModelWrapper(model)
        cwl2 = CarliniWagnerL2(wrap, back='tf', sess=sess)
        attacker_params = {'binary_search_steps': 9,
                     'max_iterations': 2000,
                     'abort_early': True,
                     'learning_rate': 0.01,
                     'batch_size': 1,
                     'initial_const': 0.01,
                     'confidence': 20}
        # generate targeted labels, 9 for each test example
        if targeted:
            attacker_params['y_target'] = adv_ys
            # attacker_params['batch_size'] = 9
        attacker = cwl2

    time_start = time.time()
    if attack == "fgsm":
        # Evaluate the accuracy of the "black-box" model on adversarial examples
        if targeted:
            accuracy = model_eval(sess, x, y, model(x_adv_sub), adv_inputs, ori_labels, feed={t_y: adv_ys},
                                  args=eval_params)
        else:
            accuracy = model_eval(sess, x, y, model(x_adv_sub), adv_inputs, ori_labels,
                                  args=eval_params)
        print('Test accuracy of oracle on adversarial examples generated '
              'using the substitute: ' + str(accuracy))
        accuracies['bbox_on_sub_adv_ex'] = accuracy
    else:
        # Evaluate the accuracy of the "black-box" model on adversarial examples
        x_adv_sub_np = attacker.generate_np(adv_inputs, **attacker_params)
        accuracy = model_eval(sess, x, y, bbox_preds, x_adv_sub_np, ori_labels,
                              args=eval_params)
        print('Test accuracy of oracle on adversarial examples generated '
              'using the substitute (NP): ' + str(accuracy))
        accuracies['bbox_on_sub_adv_ex'] = accuracy
    time_end = time.time()
    print('Attack time:', time_end - time_start)

    # Evaluate the targeted attack
    if attack == "fgsm":
        bbox_adv_predict = batch_eval(sess, [x], [model(x_adv_sub)], [adv_inputs], feed={t_y: adv_ys},
                              args=eval_params)[0]
    else:
        bbox_adv_predict = batch_eval(sess, [x], [bbox_preds], [x_adv_sub_np],
                              args=eval_params)[0]
    bbox_adv_class = np.argmax(bbox_adv_predict, axis = 1)
    print(bbox_adv_class)
    print(true_class)
    true_class = np.argmax(ori_labels, axis = 1)
    untargeted_success = np.mean(bbox_adv_class != true_class)
    print('Untargeted attack success rate:', untargeted_success)
    accuracies['untargeted_success'] = untargeted_success
    if targeted:
        targeted_success = np.mean(bbox_adv_class == targeted_class)
        print('Targeted attack success rate:', targeted_success)
        accuracies['targeted_success'] = targeted_success

    if attack == "cwl2":
        # Compute the L2 pertubations of generated adversarial examples
        percent_perturbed = np.sum((x_adv_sub_np - adv_inputs)**2, axis=(1, 2, 3))**.5
        print(percent_perturbed)
        # print('Avg. L_2 norm of perturbations {0:.4f}'.format(np.mean(percent_perturbed)))
        # when computing the mean, removing the failure attacks first
        print('Avg. L_2 norm of all perturbations {0:.4f}'.format(np.mean(percent_perturbed[percent_perturbed > 1e-8])))
        print('Avg. L_2 norm of successful untargeted perturbations {0:.4f}'.format(np.mean(percent_perturbed[bbox_adv_class != true_class])))
        if targeted:
            print('Avg. L_2 norm of successful targeted perturbations {0:.4f}'.format(np.mean(percent_perturbed[bbox_adv_class == targeted_class])))

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess, x, y, bbox_preds, adv_inputs[mask], ori_labels[mask],
                          args=eval_params)
    print('Test accuracy of excluding originally incorrect labels (should be 1.0): ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex_exc_ori'] = accuracy

    if attack == "fgsm":
        # Evaluate the accuracy of the "black-box" model on adversarial examples (excluding correct)
        accuracy = model_eval(sess, x, y, model(x_adv_sub), adv_inputs[mask], ori_labels[mask], feed={t_y: adv_ys[mask]},
                              args=eval_params)
        print('Test accuracy of oracle on adversarial examples generated '
              'using the substitute (excluding originally incorrect labels): ' + str(accuracy))
        accuracies['bbox_on_sub_adv_ex_exc'] = accuracy
    else:
        # Evaluate the accuracy of the "black-box" model on adversarial examples (excluding correct)
        x_adv_sub_mask_np = x_adv_sub_np[mask]
        accuracy = model_eval(sess, x, y, bbox_preds, x_adv_sub_mask_np, ori_labels[mask],
                              args=eval_params)
        print('Test accuracy of oracle on adversarial examples generated '
              'using the substitute (excluding originally incorrect labels, NP): ' + str(accuracy))
        accuracies['bbox_on_sub_adv_ex_exc'] = accuracy

    return accuracies
y = tf.placeholder(tf.float32, shape=(None, 10))

# Define TF model graph
model = model_mnist()
predictions = model(x)
print("Defined TensorFlow model graph.")

# Train an MNIST model
model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate)

##############################################################################
# Create Adversarials
##############################################################################
# Craft adversarial examples using Fast Gradient Sign Method (FGSM)
adv_x = fgsm(x, predictions, eps=0.2)
X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test])
assert X_test_adv.shape[0] == 10000, X_test_adv.shape

# Evaluate the accuracy of the MNIST model on adversarial examples
accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test)
print('Test accuracy on adversarial examples: ' + str(accuracy))

# save instances
np.save('data/x_train.npy', X_train)
np.save('data/y_train.npy', Y_train)
np.save('data/x_test.npy', X_test)
np.save('data/y_test.npy', Y_test)
np.save('data/adversarials.npy', X_test_adv)

# load instances
X_train = np.load('data/x_train.npy')
Example #16
0
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes=10,
              nb_epochs_s=250, batch_size=128, learning_rate=0.001, data_aug=6, lmbda=0.1,
              rng=None):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :param rng: numpy.random.RandomState instance
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_wrapper = cifar10vgg(empty_model=True)
    model_sub = model_wrapper.model
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in range(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(tf.logging.WARNING, "cleverhans.utils.tf"):
            model_train(sess, x, y, preds_sub, X_sub, Y_sub,
                        init_all=False, args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          lmbda_coef * lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #17
0
def cifar_blackbox(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_classes=10, batch_size=128,
                   learning_rate=0.001, nb_epochs=50, holdout=150, data_aug=6,
                   nb_epochs_s=50, lmbda=0.1):
    """
    CIFAR tutorial for the black-box attack from arxiv.org/abs/1602.02697
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :return: a dictionary with:
             * black-box model accuracy on test set
             * substitute model accuracy on test set
             * black-box model accuracy on adversarial examples transferred
               from the substitute model
    """
    keras.layers.core.K.set_learning_phase(0)

    # Dictionary used to keep track and return key accuracies
    accuracies = {}

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session and set as Keras backend session
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    keras.backend.set_session(sess)

    # Get CIFAR data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    # Initialize substitute training set reserved for adversary
    X_sub = X_test[:holdout]
    Y_sub = np.argmax(Y_test[:holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[holdout:]
    Y_test = Y_test[holdout:]

    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    prep_bbox_out = prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
                              nb_epochs, batch_size, learning_rate)
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # Train substitute using method from https://arxiv.org/abs/1602.02697
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, X_sub, Y_sub,
                              nb_classes, nb_epochs_s, batch_size,
                              learning_rate, data_aug, lmbda)
    model_sub, preds_sub = train_sub_out

    # Evaluate the substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, X_test, Y_test, args=eval_params)
    accuracies['sub'] = acc
    print('substitution model accuracy:', acc)

    # Find the correctly predicted labels
    original_predict = batch_eval(sess, [x], [bbox_preds], [X_test],
                          args=eval_params)[0]
    original_class = np.argmax(original_predict, axis = 1)
    true_class = np.argmax(Y_test, axis = 1)
    mask = true_class == original_class
    print(np.sum(mask), "out of", mask.size, "are correct labeled,", len(X_test[mask]))  
    
    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    fgsm_par = {'eps': 0.4, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    wrap = KerasModelWrapper(model_sub)
    fgsm = FastGradientMethod(wrap, sess=sess)

    # Craft adversarial examples using the substitute
    eval_params = {'batch_size': batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess, x, y, model(x_adv_sub), X_test, Y_test,
                          args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute: ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex'] = accuracy
    
    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess, x, y, bbox_preds, X_test[mask], Y_test[mask],
                          args=eval_params)
    print('Test accuracy of excluding originally incorrect labels: ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex_exc_ori'] = accuracy
    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess, x, y, model(x_adv_sub), X_test[mask], Y_test[mask],
                          args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute (excluding originally incorrect labels): ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex_exc'] = accuracy

    return accuracies
Example #18
0
def train_sub(sess,
              x,
              y,
              bbox_preds,
              X_sub,
              Y_sub,
              nb_classes,
              nb_epochs_s,
              batch_size,
              learning_rate,
              data_aug,
              lmbda,
              rng,
              substitute_model=None):
    """This function trains the substitute model as described in
        arxiv.org/abs/1602.02697

    Args:
        sess: TF session
        x: input TF placeholder
        y: output TF placeholder
        bbox_preds: output of black-box model predictions
        X_sub: initial substitute training data
        Y_sub: initial substitute training labels
        nb_classes: number of output classes
        nb_epochs_s: number of epochs to train substitute model
        batch_size: size of training batches
        learning_rate: learning rate for training
        data_aug: number of times substitute training data is augmented
        lmbda: lambda from arxiv.org/abs/1602.02697
        rng: numpy.random.RandomState instance
    
    Returns:
        model_sub: The substitute model function.
        preds_sub: The substitute prediction tensor.
    """
    # Define TF model graph (for the black-box model).
    model_sub = substitute_model
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow.
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively.
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess,
                    x,
                    y,
                    preds_sub,
                    X_sub,
                    to_categorical(Y_sub),
                    init_all=False,
                    args=train_params,
                    rng=rng,
                    feed={K.learning_phase(): 1})

        # If we are not at last substitute training iteration, augment dataset.
        if rho < data_aug - 1:

            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation.
            X_sub = jacobian_augmentation(sess,
                                          x,
                                          X_sub,
                                          Y_sub,
                                          grads,
                                          lmbda,
                                          feed={K.learning_phase(): 0})

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box.
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub) / 2):]
            eval_params = {'batch_size': batch_size}

            # To initialize the local variables of Defense-GAN.
            sess.run(tf.local_variables_initializer())

            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params,
                                  feed={K.learning_phase(): 0})[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model.
            Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #19
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """
    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print "Created TensorFlow session and set Keras backend."

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print "Loaded MNIST test data."

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print "Defined TensorFlow model graph."

    # Train an MNIST model
    tf_model_train(sess, x, y, predictions, X_train, Y_train)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
    assert X_test.shape[0] == 10000, X_test.shape
    print 'Test accuracy on legitimate test examples: ' + str(accuracy)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test])
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test)
    print 'Test accuracy on adversarial examples: ' + str(accuracy)

    print "Repeating the process, using adversarial training"
    # Redefine TF model graph
    model_2 = model_mnist()
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    # Perform adversarial training
    tf_model_train(sess,
                   x,
                   y,
                   predictions_2,
                   X_train,
                   Y_train,
                   predictions_adv=predictions_2_adv)

    # Evaluate the accuracy of the adversarialy trained MNIST model on
    # legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test)
    print 'Test accuracy on legitimate test examples: ' + str(accuracy)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on
    # the new model, which was trained using adversarial training
    X_test_adv_2, = batch_eval(sess, [x], [adv_x_2], [X_test])
    assert X_test_adv_2.shape[0] == 10000, X_test_adv_2.shape

    # Evaluate the accuracy of the adversarially trained MNIST model on
    # adversarial examples
    accuracy_adv = tf_model_eval(sess, x, y, predictions_2, X_test_adv_2,
                                 Y_test)
    print 'Test accuracy on adversarial examples: ' + str(accuracy_adv)
Example #20
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an MNIST model
    tf_model_train(sess,
                   x,
                   y,
                   predictions,
                   X_train,
                   Y_train,
                   evaluate=evaluate)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test])
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test)
    print('Test accuracy on adversarial examples: ' + str(accuracy))

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = model_mnist()
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained MNIST model on
        # legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained MNIST model on
        # adversarial examples
        accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test,
                                     Y_test)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    tf_model_train(sess,
                   x,
                   y,
                   predictions_2,
                   X_train,
                   Y_train,
                   predictions_adv=predictions_2_adv,
                   evaluate=evaluate_2)
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda,
              aug_batch_size, rng, img_rows=28, img_cols=28,
              nchannels=1):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :param rng: numpy.random.RandomState instance
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = ModelSubstitute('model_s', nb_classes)
    preds_sub = model_sub.get_logits(x)
    loss_sub = LossCrossEntropy(model_sub, smoothing=0)

    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            train(sess, loss_sub, x, y, X_sub,
                  to_categorical(Y_sub, nb_classes),
                  init_all=False, args=train_params, rng=rng,
                  var_list=model_sub.get_params())

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          lmbda_coef * lmbda, aug_batch_size)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #22
0
        saved_path += '/wrn-28-10-t--2018-01-23-19-13/ResNet'  # vanilla
    model.load_state(saved_path)

    cost, ev = model.test(ds_test)
    accuracies = [ev['accuracy']]
    for eps in epss[1:]:
        print("Creating adversarial examples...")
        clip_max = (255 - np.max(Cifar10Loader.mean)) / np.max(
            Cifar10Loader.std)
        n_fgsm = fgsm(model.nodes.input,
                      model.nodes.probs,
                      eps=eps,
                      clip_min=-clip_max,
                      clip_max=clip_max)
        images_adv, = batch_eval(model._sess, [model.nodes.input], [n_fgsm],
                                 [ds_test.images[:model.batch_size * 64]],
                                 args={'batch_size': model.batch_size},
                                 feed={model._is_training: False})
        adv_ds_test = Dataset(images_adv, ds_test.labels, ds_test.class_count)
        cost, ev = model.test(adv_ds_test)
        accuracies.append(ev['accuracy'])
    accuracieses.append(accuracies)
    print(accuracies)


def plot(epss, curves, names):
    plt.figure()
    plt.rcParams["mathtext.fontset"] = "cm"
    #plt.yticks(np.arange(0, 1, 0.05))
    axes = plt.gca()
    axes.grid(color='0.9', linestyle='-', linewidth=1)
    axes.set_ylim([0, 1])
top = 10
epss = [0, 0.02, 0.05, 0.2, 0.5, 1]
image_count = 5
batch_size = 64
adv_image_lists = [ds_test.images[:batch_size]]
for eps in epss[1:]:
    print("Creating adversarial examples...")
    clip_max = (255 - np.max(Cifar10Loader.mean)) / np.max(Cifar10Loader.std)
    n_fgsm = fgsm(model.nodes.input,
                  model.nodes.probs,
                  eps=eps,
                  clip_min=-clip_max,
                  clip_max=clip_max)
    images_adv, = batch_eval(model._sess, [model.nodes.input], [n_fgsm],
                             [adv_image_lists[0]],
                             args={'batch_size': batch_size},
                             feed={model._is_training: False})
    adv_image_lists.append(images_adv)


def generate_visualization(i0):
    def get_row(i):
        ims = adv_image_lists[i][i0:i0 + image_count]
        s, m = Cifar10Loader.std, Cifar10Loader.mean
        scale = lambda x: np.clip(x * s + m, 0, 255).astype(np.ubyte)
        return list(map(scale, ims))

    cols = [get_row(i) for i in range(i0, i0 + len(epss))]
    return visualization.compose(cols, format=None)

    images = [im for i in range(i0, i0 + 3) for im in get_row(i)]
def train_sub(sess,
              logits_scalar,
              x,
              y,
              bbox_preds,
              X_sub,
              Y_sub,
              nb_classes,
              nb_epochs_s,
              batch_size,
              learning_rate,
              data_aug,
              lmbda,
              rng,
              binary=False,
              phase=None,
              model_path=None):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :param rng: numpy.random.RandomState instance
    :param phase: placeholder for batch_norm phase (training or testing)
    :param phase_val: True if training, False if testing
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    train_params = {
        'binary': False,
        'nb_epochs': nb_epochs_s,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': 'sub_model',
        'train_scope': 'sub_model',
        'reuse_global_step': False,
        'is_training': True
    }

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))

        if rho > 0:
            train_params.update({'reuse_global_step': True})
        if model_path is not None:
            train_params.update({'log_dir': model_path})
            model_train(sess,
                        x,
                        y,
                        preds_sub,
                        X_sub,
                        to_categorical(Y_sub),
                        phase=phase,
                        save=True,
                        init_all=False,
                        args=train_params,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds_sub,
                        X_sub,
                        to_categorical(Y_sub),
                        phase=phase,
                        init_all=False,
                        args=train_params,
                        rng=rng)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub) / 2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  feed={phase: False},
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #25
0
top_20_val = [100*feats[k] / y_test.shape[0] for k in top_20]

plt.figure(figsize=(12, 6))
plt.bar(np.arange(20), top_20_val, align='center')
plt.xticks(np.arange(20), X_test.columns[top_20], rotation='vertical')
plt.title('Feature participation in adversarial examples')
plt.ylabel('Percentage (%)')
plt.xlabel('Features')
plt.savefig('Adv_features.png', bbox_inches = "tight")

# Craft adversarial examples using Fast Gradient Sign Method (FGSM)
fgsm = FastGradientMethod(models, sess=sess)
fgsm_params = {'eps': 0.3}
adv_x_f = fgsm.generate(x, **fgsm_params)
# adv_x_f = tf.stop_gradient(adv_x_f)
X_test_adv, = batch_eval(sess, [x], [adv_x_f], [X_test_scaled])

# Evaluate accuracy
eval_par = {'batch_size': FLAGS.batch_size}
accuracy = model_eval(sess, x, y, predictions, X_test_adv, y_test, args=eval_par)
print("Test accuracy on adversarial examples: {}".format(accuracy))

# Comparison of adversarial and original test samples (attack)
feats = dict()
total = 0
orig_attack = X_test_scaled - X_test_adv

for i in range(0, orig_attack.shape[0]):
	ind = np.where(orig_attack[i, :] != 0)[0]
	total += len(ind)
	for j in ind:
Example #26
0
def generate_images():

    print('==> Preparing data..')
    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
            "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    print "==> Beginning Session"

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Load model
    print "==> loading vgg model"
    args = load_args()

    if args.model == 'vgg6': model = vggbn(top=True, pool=args.pool)
    if args.model == 'vgg15': model = vgg15(top=True, pool=args.pool)
    if args.model == 'generic': model = generic(top=True, pool=args.pool)
    if args.model == 'resnet18': model = resnet.build_resnet_18(args.pool)

    predictions = model(x)

    model.load_weights(args.load)

    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args=eval_params)
    print '==> Accuracy : {}'.format(accuracy)

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate
    }

    im_base = '/im_'
    model_name = args.model + '_p' + str(args.pool)
    if args.attack == 'fgsm' or args.attack == 'FGSM':

        result_dir = os.getcwd() + '/images/fgsm/'
        print "==> creating fgsm adversarial wrapper"
        adv_x = fgsm(x, predictions, eps=0.3)

        print "==> sending to batch evaluator to finalize adversarial images"
        eval_params = {'batch_size': FLAGS.batch_size}
        X_train_adv, = batch_eval(sess, [x], [adv_x], [X_train],
                                  args=eval_params)

        i = 0
        if not os.path.exists(result_dir + model_name):
            os.makedirs(result_dir + model_name)
        print "==> saving images to {}".format(result_dir + model_name)
        for ad in X_train_adv:
            scipy.misc.imsave(
                result_dir + model_name + im_base + str(i) + '.png', ad)
            i += 1

        sess.close()
    """ JSMA """
    if args.attack == 'jsma' or args.attack == 'JSMA':

        result_dir = os.getcwd() + '/images/jsma/trial_single_adv'
        print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
              str(FLAGS.nb_classes - 1) + ' adversarial examples')

        results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

        # This array contains the fraction of perturbed features for each test set
        perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                                 dtype='f')

        # Define the TF graph for the model's Jacobian
        grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

        # Initialize our array for grid visualization
        grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows,
                      FLAGS.img_cols, FLAGS.nb_channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')
        i_saved = 0
        n_image = 0
        # Loop over the samples we want to perturb into adversarial examples
        print "==> saving images to {}".format(result_dir + model_name)
        for sample_ind in xrange(7166, FLAGS.source_samples):
            # We want to find an adversarial example for each possible target class
            current_class = int(np.argmax(Y_train[sample_ind]))
            target_classes = other_classes(FLAGS.nb_classes, current_class)
            # For the grid visualization, keep original images along the diagonal
            grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
                X_train[sample_ind:(sample_ind + 1)],
                (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

            # Loop over all target classes
            adversarials = []
            for idx, target in enumerate(target_classes):
                print "image {}".format(sample_ind)

                # here we hold all successful adversarials for this iteration
                # since we dont want 500k images, we will uniformly sample an image to save after each target

                print('--------------------------------------')
                print('Creating adv. example for target class ' + str(target))

                # This call runs the Jacobian-based saliency map approach
                adv_x, res, percent_perturb = jsma(
                    sess,
                    x,
                    predictions,
                    grads,
                    X_train[sample_ind:(sample_ind + 1)],
                    target,
                    theta=1,
                    gamma=0.1,
                    increase=True,
                    back='tf',
                    clip_min=0,
                    clip_max=1)
                # Display the original and adversarial images side-by-side
                adversarial = np.reshape(
                    adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))
                original = np.reshape(
                    X_train[sample_ind:(sample_ind + 1)],
                    (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

                if FLAGS.viz_enabled:

                    if 'figure' not in vars():
                        figure = pair_visual(original, adversarial)
                    else:
                        figure = pair_visual(original, adversarial, figure)

                if not os.path.exists(result_dir + model_name):
                    os.makedirs(result_dir + model_name)

                if res == 1:
                    adversarials.append(adversarial)

                if idx == FLAGS.nb_classes - 2:

                    try:
                        if len(adversarials) == 1:
                            idx_uniform = 0
                        else:
                            idx_uniform = np.random.randint(
                                0,
                                len(adversarials) - 1)
                        print idx_uniform
                        scipy.misc.imsave(
                            result_dir + model_name + im_base +
                            str(sample_ind) + '.png',
                            adversarials[idx_uniform])
                        i_saved += 1
                        print "==> images saved: {}".format(i_saved)

                    except:

                        print "No adversarials generated"

# Add our adversarial example to our grid data
                grid_viz_data[target, current_class, :, :, :] = np.reshape(
                    adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

                # Update the arrays for later analysis
                results[target, sample_ind] = res
                perturbations[target, sample_ind] = percent_perturb

            n_image += 1

# Compute the number of adversarial examples that were successfuly found
        nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples)
        succ_rate = float(np.sum(results)) / nb_targets_tried
        print(
            'Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate))

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(perturbations)
        print('Avg. rate of perturbed features {0:.2f}'.format(
            percent_perturbed))

        # Compute the average distortion introduced for successful samples only
        percent_perturb_succ = np.mean(perturbations * (results == 1))
        print(
            'Avg. rate of perturbed features for successful '
            'adversarial examples {0:.2f}'.format(percent_perturb_succ))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if FLAGS.viz_enabled:
            _ = grid_visual(grid_viz_data)
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an MNIST model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate
    }
    model_train(sess,
                x,
                y,
                predictions,
                X_train,
                Y_train,
                evaluate=evaluate,
                args=train_params)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    eval_params = {'batch_size': FLAGS.batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test_adv,
                          Y_test,
                          args=eval_params)
    print('Test accuracy on adversarial examples: ' + str(accuracy))

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained MNIST model on
        # legitimate test examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained MNIST model on
        # adversarial examples
        accuracy_adv = model_eval(sess,
                                  x,
                                  y,
                                  predictions_2_adv,
                                  X_test,
                                  Y_test,
                                  args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    model_train(sess,
                x,
                y,
                predictions_2,
                X_train,
                Y_train,
                predictions_adv=predictions_2_adv,
                evaluate=evaluate_2,
                args=train_params)
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model(img_rows=32, img_cols=32, channels=3)
    predictions = model(x)
    print "Defined TensorFlow model graph."

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print 'Test accuracy on legitimate test examples: ' + str(accuracy)

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate
    }
    model_train(sess,
                x,
                y,
                predictions,
                X_train,
                Y_train,
                evaluate=evaluate,
                args=train_params)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    # adv_x = fgsm(x, predictions, eps=0.3)

    mim = MIM(model, back='tf', sess=sess)
    mim_params = {
        'eps_iter': 0.06,
        'eps': 0.3,
        'nb_iter': 10,
        'ord': 2,
        'decay_factor': 1.0
    }

    adv_x = mim.generate(x, **mim_params)

    eval_params = {'batch_size': FLAGS.batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test_adv,
                          Y_test,
                          args=eval_params)
    print 'Test accuracy on adversarial examples: ' + str(accuracy)

    from scipy.misc import imsave
    path = '/home/neale/repos/adversarial-toolbox/images/adversarials/mim/cifar/symmetric/'
    """
    for i, (real, adv) in enumerate(zip(X_test, X_test_adv)):
        imsave(path+'adv/adv_{}.png'.format(i), adv)
    """
    preds = model_argmax(sess, x, predictions, X_test_adv)
    print Y_test.shape
    print preds.shape
    count = 0
    for i in range(len(preds)):
        if np.argmax(Y_test[i]) == preds[i]:
            # imsave(path+'real/im_{}.png'.format(i), X_test[i])
            # imsave(path+'adv/adv_{}.png'.format(i), X_test_adv[i])
            count += 1
    print "saved ", count
Example #29
0
}
model_train(sess, x, y, predictions, X_train, Y_train, args=train_params)


# Evaluate the MNIST model
eval_params = {'batch_size': 128}
accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params)
print('Test accuracy on legitimate test examples: ' + str(accuracy))
# Test accuracy on legitimate test examples: 0.9888


# Craft adversarial examples using the Fast Gradient Sign Method
from cleverhans.attacks_tf import fgsm
from cleverhans.utils_tf import batch_eval
adv_x = fgsm(x, predictions, eps=0.3)
X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test, args=eval_params)
print('Test accuracy on adversarial examples: ' + str(accuracy))
# Test accuracy on adversarial examples: 0.0837


# Adversarial training
model_2 = cnn_model()
predictions_2 = model_2(x)
adv_x_2 = fgsm(x, predictions_2, eps=0.3)
predictions_2_adv = model_2(adv_x_2)
model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, args=train_params)


# Evaluate the accuracy on legitimate examples
accuracy = model_eval(sess, x, y, predictions_2, X_test, Y_test, args=eval_param)
Example #30
0
def calculate_signed_gradient_x(sess, x, predictions, X_test):
    signed_gradient = get_gradient_sign_tf(x, predictions)
    X_test_signed_gradient, = batch_eval(sess, [x], [signed_gradient],
                                         [X_test])
    return X_test_signed_gradient
Example #31
0
def train_sub(sess,
              x,
              y,
              bbox_preds,
              x_sub,
              y_sub,
              nb_classes,
              nb_epochs_s,
              batch_size,
              learning_rate,
              data_aug,
              lmbda,
              aug_batch_size,
              rng,
              img_rows=28,
              img_cols=28,
              nchannels=1):
    """
  This function creates the substitute by alternatively
  augmenting the training data and training the substitute.
  :param sess: TF session
  :param x: input TF placeholder
  :param y: output TF placeholder
  :param bbox_preds: output of black-box model predictions
  :param x_sub: initial substitute training data
  :param y_sub: initial substitute training labels
  :param nb_classes: number of output classes
  :param nb_epochs_s: number of epochs to train substitute model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param data_aug: number of times substitute training data is augmented
  :param lmbda: lambda from arxiv.org/abs/1602.02697
  :param rng: numpy.random.RandomState instance
  :return:
  """
    # Define TF model graph (for the black-box model)
    model_sub = ModelSubstitute('model_s', nb_classes)
    preds_sub = model_sub.get_logits(x)
    loss_sub = CrossEntropy(model_sub, smoothing=0)

    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            train(sess,
                  loss_sub,
                  x,
                  y,
                  x_sub,
                  to_categorical(y_sub, nb_classes),
                  init_all=False,
                  args=train_params,
                  rng=rng,
                  var_list=model_sub.get_params())

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads,
                                          lmbda_coef * lmbda, aug_batch_size)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            y_sub = np.hstack([y_sub, y_sub])
            x_sub_prev = x_sub[int(len(x_sub) / 2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #32
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model(img_rows=32, img_cols=32, channels=3)
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess, x, y, predictions, X_test, Y_test,
                              args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate
    }
    model_train(sess, x, y, predictions, X_train, Y_train,
                evaluate=evaluate, args=train_params)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    fgsm = FastGradientMethod(model)
    adv_x = fgsm.generate(x, eps=0.3)
    eval_params = {'batch_size': FLAGS.batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params)
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the CIFAR10 model on adversarial examples
    accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test,
                          args=eval_params)
    print('Test accuracy on adversarial examples: ' + str(accuracy))

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=32, img_cols=32, channels=3)
    predictions_2 = model_2(x)
    fgsm_2 = FastGradientMethod(model_2)
    adv_x_2 = fgsm_2.generate(x, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess, x, y, predictions_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, predictions_2_adv, X_test,
                                  Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    model_train(sess, x, y, predictions_2, X_train, Y_train,
                predictions_adv=predictions_2_adv, evaluate=evaluate_2,
                args=train_params)

    # Evaluate the accuracy of the CIFAR10 model on adversarial examples
    accuracy = model_eval(sess, x, y, predictions_2_adv, X_test, Y_test,
                          args=eval_params)
    print('Test accuracy on adversarial examples: ' + str(accuracy))