Exemple #1
0
def prep_bbox(sess,
              x,
              y,
              x_train,
              y_train,
              x_test,
              y_test,
              nb_epochs,
              batch_size,
              learning_rate,
              rng,
              nb_classes=10,
              img_rows=28,
              img_cols=28,
              nchannels=1):
    """
  Define and train a model that simulates the "remote"
  black-box oracle described in the original paper.
  :param sess: the TF session
  :param x: the input placeholder for MNIST
  :param y: the ouput placeholder for MNIST
  :param x_train: the training data for the oracle
  :param y_train: the training labels for the oracle
  :param x_test: the testing data for the oracle
  :param y_test: the testing labels for the oracle
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param rng: numpy.random.RandomState
  :return:
  """

    # Define TF model graph (for the black-box model)
    nb_filters = 64
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    loss = CrossEntropy(model, smoothing=0.1)
    predictions = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          x_test,
                          y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
Exemple #2
0
class CNNModel:
    def __init__(self, scope):
        # Define input TF placeholder
        self.scope = scope
        self.reset()

    def reset(self):
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            self.sess = tf.get_default_session()
            # Define input TF placeholder
            self.x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
            self.y = tf.placeholder(tf.float32, shape=(None, 10))
            self.model = ModelBasicCNN('model1', 10, 64)
            self.preds = self.model.get_logits(self.x)
            self.loss = LossCrossEntropy(self.model, smoothing=0.1)

    def train(self, dataset):
        train_params = {
            'nb_epochs': 1,
            'batch_size': 32,
            'learning_rate': 1e-2
        }
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            self.sess.run(tf.global_variables_initializer())
            train(self.sess,
                  self.loss,
                  self.x,
                  self.y,
                  dataset.x,
                  dataset.y,
                  args=train_params)

    def test(self, dataset):
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            eval_params = {'batch_size': 32}
            accuracy = model_eval(self.sess,
                                  self.x,
                                  self.y,
                                  self.preds,
                                  dataset.x,
                                  dataset.y,
                                  args=eval_params)
            print('Test accuracy on legitimate test examples: {0}'.format(
                accuracy))
            return {'accuracy': accuracy}

    def tf(self):
        return self.model
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
              nb_epochs, batch_size, learning_rate,
              rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param rng: numpy.random.RandomState
    :return:
    """

    # Define TF model graph (for the black-box model)
    nb_filters = 64
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    loss = LossCrossEntropy(model, smoothing=0.1)
    predictions = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    train(sess, loss, x, y, X_train, Y_train, args=train_params, rng=rng)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params,
          rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=64,
                   num_threads=None,
                   label_smoothing=0.1):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    print("after clean train>>>>>>>>>>>>>>>>>>>>>")

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x,
          y,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
Exemple #6
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None,
                   label_smoothing=0.1):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    #sess = tf.Session(config=tf.ConfigProto(**config_args))
    sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 1}))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(file,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    rng = np.random.RandomState([2017, 8, 30])

    ################ color training initialization ####################

    color_training_epochs = 5000
    color_learning_rate = 0.1
    colorCategory = [
        [0.0, 0.4],  # Black
        [0.3, 0.7],  # Grey
        [0.6, 1.0]  # White
    ]

    numOfPRModel = 20
    minColorEpoch = 300
    maxColorEpoch = 3000

    numColorInput = 1
    #numColorOutput = len(colorCategory)

    color_x = tf.placeholder(
        tf.float32,
        [None, numColorInput])  # mnist data image of shape 28*28=784
    color_y = tf.placeholder(
        tf.float32,
        [None, numColorOutput])  # 0-9 digits recognition => 10 classes

    # Set multiple models' weights and biases
    color_W = {}
    color_b = {}
    color_pred_out = {}
    color_cost = {}
    color_optimizer = {}
    color_argmax = {}
    color_correct_prediction = {}
    color_accuracy = {}
    for i in range(numOfPRModel):
        color_W["w" + str(i)] = tf.Variable(
            tf.random_normal([numColorInput, numColorOutput]))
        color_b["b" + str(i)] = tf.Variable(tf.random_normal([numColorOutput]))
        color_pred_out["out" + str(i)] = tf.matmul(
            color_x, color_W["w" + str(i)]) + color_b["b" + str(i)]  # Softmax
        color_cost["cost" + str(i)] = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=color_pred_out["out" + str(i)], labels=color_y))
        # Gradient Descent
        color_optimizer["opt" + str(i)] = tf.train.GradientDescentOptimizer(
            color_learning_rate).minimize(color_cost["cost" + str(i)])

        # Test model
        color_argmax["argmax" + str(i)] = tf.argmax(
            color_pred_out["out" + str(i)], 1)
        color_correct_prediction["pred" + str(i)] = tf.equal(
            tf.argmax(color_pred_out["out" + str(i)], 1),
            tf.argmax(color_y, 1))
        # Calculate accuracy
        color_accuracy["acc" + str(i)] = tf.reduce_mean(
            tf.cast(color_correct_prediction["pred" + str(i)], tf.float32))

    # Graph for re-generating the original image into a new image by using trained color model
    pr_model_x = tf.placeholder(
        tf.float32,
        [None, n_input, numColorInput])  # mnist data image of shape 28*28=784
    pr_model_W = tf.placeholder(tf.float32,
                                [None, numColorInput, numColorOutput
                                 ])  # mnist data image of shape 28*28=784
    pr_model_b = tf.placeholder(tf.float32,
                                [None, numColorInput, numColorOutput
                                 ])  # mnist data image of shape 28*28=784
    pr_model_output = tf.one_hot(
        tf.argmax((tf.matmul(pr_model_x, pr_model_W) + pr_model_b), 2),
        numColorOutput)

    # Merge the random generated output for new image based on the colorCategory
    randomColorCategory = []
    for i in range(len(colorCategory)):
        tmp = []
        tmpRandomColorCategory = my_tf_round(
            tf.random_uniform(tf.shape(pr_model_x),
                              colorCategory[i][0],
                              colorCategory[i][1],
                              dtype=tf.float32), 2)
        tmp.append(tmpRandomColorCategory)
        randomColorCategory.append(tf.concat(tmp, 1))
    random_merge = tf.reshape(tf.concat(randomColorCategory, -1),
                              [-1, n_input, numColorOutput])
    random_color_set = tf.reduce_sum(
        tf.multiply(pr_model_output, random_merge), 2)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    # x = tf.reshape(random_color_set, shape=(-1, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    print(random_color_set)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': save_dir,
        'filename': filename,
        'numColorOutput': numColorOutput
    }
    eval_params = {'batch_size': batch_size, 'numColorOutput': numColorOutput}
    fgsm_params = {'eps': 8 / 256, 'clip_min': 0., 'clip_max': 1.}

    #sess = tf.Session()

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                pred2=None,
                c_w=None,
                c_b=None,
                pr_model_x=None,
                random_color_set=None,
                pr_model_W=None,
                pr_model_b=None,
                pr_model_output=None,
                ae=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_set,
                         y_set,
                         args=eval_params,
                         pred2=pred2,
                         c_w=c_w,
                         c_b=c_b,
                         pr_model_x=pr_model_x,
                         random_color_set=random_color_set,
                         pr_model_W=pr_model_W,
                         pr_model_b=pr_model_b,
                         pr_model_output=pr_model_output,
                         is_adv=is_adv,
                         ae=ae)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    with sess.as_default():
        if hasattr(tf, "global_variables_initializer"):
            tf.global_variables_initializer().run()
        else:
            warnings.warn("Update your copy of tensorflow; future versions of "
                          "CleverHans may drop support for this version.")
            sess.run(tf.initialize_all_variables())

        ################# color training ####################
        print("Trying to load pr model from: " + model_path2)
        if os.path.exists(model_path2 + ".meta"):
            tf_model_load(sess, model_path2)
            c_w, c_b = sess.run([color_W, color_b])
            print("Load color trained model in training")
        else:
            # Training the PR model
            c_w = {}
            c_b = {}
            for modelcount in range(numOfPRModel):
                color_training_epochs = np.random.randint(
                    minColorEpoch, maxColorEpoch)
                for epoch in range(color_training_epochs):
                    outputColorY = []
                    p1 = np.random.random(100)
                    for i in range(len(p1)):
                        outputOverlapColorY = []
                        for j in range(len(colorCategory)):
                            if p1[i] >= colorCategory[j][0] and p1[
                                    i] <= colorCategory[j][1]:
                                colorIndexSeq = []
                                for k in range(len(colorCategory)):
                                    if j == k:
                                        colorIndexSeq.append(1)
                                    else:
                                        colorIndexSeq.append(0)
                                outputOverlapColorY.append(colorIndexSeq)
                                # break

                        # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item
                        outputColorY.append(
                            outputOverlapColorY[np.random.randint(
                                0, len(outputOverlapColorY))])

                    inputColorX = p1.reshape(100, 1)
                    _, c, _c_w, _c_b = sess.run([
                        color_optimizer["opt" + str(modelcount)],
                        color_cost["cost" + str(modelcount)],
                        color_W["w" + str(modelcount)],
                        color_b["b" + str(modelcount)]
                    ],
                                                feed_dict={
                                                    color_x: inputColorX,
                                                    color_y: outputColorY
                                                })
                    avg_cost = c

                    # Evaluating color model
                    outputColorY = []
                    p1 = np.random.random(100)
                    # Generate output for random color inputs (test case)
                    for i in range(len(p1)):
                        for j in range(len(colorCategory)):
                            outputOverlapColorY = []
                            if p1[i] >= colorCategory[j][0] and p1[
                                    i] <= colorCategory[j][1]:
                                colorIndexSeq = []
                                for k in range(len(colorCategory)):
                                    if j == k:
                                        colorIndexSeq.append(1)
                                    else:
                                        colorIndexSeq.append(0)
                                outputOverlapColorY.append(colorIndexSeq)
                                break

                        # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item
                        outputColorY.append(
                            outputOverlapColorY[np.random.randint(
                                0, len(outputOverlapColorY))])

                    inputColorX = p1.reshape(100, 1)
                    # print(random_xs)
                    acc, argmax = sess.run([
                        color_accuracy["acc" + str(modelcount)],
                        color_argmax["argmax" + str(modelcount)]
                    ],
                                           feed_dict={
                                               color_x: inputColorX,
                                               color_y: outputColorY
                                           })
                print(str(modelcount + 1) + ") Epoch:",
                          '%04d' % (epoch + 1) + "/" + str(color_training_epochs) + ", Cost= " + \
                          "{:.9f}".format(avg_cost) + ", Training Accuracy= " + \
                          "{:.5f}".format(acc) + " ")

                c_w["w" + str(modelcount)] = _c_w
                c_b["b" + str(modelcount)] = _c_b

                # print(c_w)

            save_path = os.path.join(save_dir2, filename2)
            saver = tf.train.Saver()
            saver.save(sess, save_path)
        ##################### end of color training ------------------------------

    ################# model training ####################
    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = LossCrossEntropy(model, smoothing=label_smoothing)

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        saveFileNum = 50
        # saveFileNum = 500
        # saveFileNum = 1000
        model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum))
        fgsm = FastGradientMethod(model)
        # fgsm = BasicIterativeMethod(model)
        # fgsm = MomentumIterativeMethod(model)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        def evaluate():
            do_eval(preds,
                    x_test,
                    y_test,
                    'clean_train_clean_eval',
                    False,
                    pred2=preds,
                    c_w=c_w,
                    c_b=c_b,
                    pr_model_x=pr_model_x,
                    random_color_set=random_color_set,
                    pr_model_W=pr_model_W,
                    pr_model_b=pr_model_b)
            #do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True,
            #pred2=preds, c_w=c_w, c_b=c_b, ae=adv_x,
            #pr_model_x=pr_model_x, random_color_set=random_color_set,
            #pr_model_W=pr_model_W, pr_model_b=pr_model_b, pr_model_output=pr_model_output
            #)

        print("Trying to load trained model from: " + model_path)
        if os.path.exists(model_path + ".meta"):
            tf_model_load(sess, model_path)
            print("Load trained model")
        else:
            train(sess,
                  loss,
                  x,
                  y,
                  x_train,
                  y_train,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params(),
                  save=True,
                  c_w=c_w,
                  c_b=c_b,
                  pr_model_x=pr_model_x,
                  random_color_set=random_color_set,
                  pr_model_W=pr_model_W,
                  pr_model_b=pr_model_b)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds,
                x_test,
                y_test,
                'clean_train_adv_eval',
                True,
                pred2=preds,
                c_w=c_w,
                c_b=c_b,
                ae=adv_x,
                pr_model_x=pr_model_x,
                random_color_set=random_color_set,
                pr_model_W=pr_model_W,
                pr_model_b=pr_model_b)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')
def mnist_tutorial(train_start=0,
                   train_end=1000,
                   test_start=0,
                   test_end=1666,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])
    sess = tf.Session()

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
            # added by hhkim
            # print('cur:', y_set)
            # feed_dict = {x: x_set}
            # probabilities = sess.run(preds, feed_dict)
            # print(probabilities)
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', 10, nb_filters)
        preds = model.get_logits(x)
        loss = LossCrossEntropy(model, smoothing=0.1)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)
        print('adv_x shape:', adv_x.shape)

        # Get array of output
        # updated by hak hyun kim
        feed_dict = {x: x_test[:1]}
        probabilities = sess.run(preds_adv, feed_dict)
        print(probabilities)
        print('original answer :', y_test[:1])

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test[:1], y_test[:1], 'clean_train_adv_eval',
                True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params,
              save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
                for i in range(nb_classes)]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]],
                dtype=np.float32)
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for
                 instance in x_test[:source_samples]], dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape((source_samples *
                                                     nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples * nb_classes if
                 targeted else source_samples,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(
            sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Exemple #9
0
def zoo(viz_enabled=VIZ_ENABLED,
        nb_epochs=NB_EPOCHS,
        batch_size=BATCH_SIZE,
        source_samples=SOURCE_SAMPLES,
        learning_rate=LEARNING_RATE,
        attack_iterations=ATTACK_ITERATIONS,
        model_path=MODEL_PATH,
        targeted=TARGETED):
    """
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if DATASET == 'MNIST':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.MNIST(train_start=train_start,
                           train_end=train_end,
                           test_start=test_start,
                           test_end=test_end,
                           center=False)
    elif DATASET == 'SVHN':
        train_start = 0
        train_end = 73257
        test_start = 0
        test_end = 26032
        ds = dataset.SVHN(train_start=train_start,
                          train_end=train_end,
                          test_start=test_start,
                          test_end=test_end)
    elif DATASET == 'CIFAR10':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.CIFAR10(train_start=train_start,
                             train_end=train_end,
                             test_start=test_start,
                             test_end=test_end,
                             center=False)

    x_train, y_train, x_test, y_test = ds.get_set('train') + ds.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN(DATASET, nb_classes, nb_filters,
                          (None, img_rows, img_cols, nchannels))
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 10, 22])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a Zoo attack object
    zoo = Zoo(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    zoo_params = {
        'binary_search_steps': BINARY_SEARCH_STEPS,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': ZOO_LEARNING_RATE,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': INIT_CONST,
        'solver': SOLVER,
        'image_shape': [img_rows, img_cols, nchannels],
        'nb_classes': nb_classes
    }

    adv = zoo.generate_np(adv_inputs, **zoo_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - model_eval(
                sess, x, y, preds, adv, y_test[idxs], args=eval_params)
        else:
            adv_accuracy = 1 - model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          adv,
                                          y_test[:source_samples],
                                          args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
Exemple #10
0
def simulate_jsma():
    # MNIST-specific dimensions
    # img_rows = 28
    # img_cols = 28
    # channels = 1

    # Get MNIST test data
    x_test, y_test = get_mnist_data()
                                
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    ##################################
    #          Load Model            #
    ##################################
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        save_path = r'C:\Users\hhkim\Desktop\scc\cleverhans\cleverhans_tutorials\model_data\train_model'
        saver.restore(sess, save_path)

        sample_class = int(input('input sample class(0-9): '))
        target_class = int(input('input target class(0-9): '))

        if sample_class<0 or sample_class>9 or target_class<0 or target_class>9 :
            print('input is wrong')

        sample_idx = get_mnist_idx(y_test, sample_class)
        target_idx = get_mnist_idx(y_test, target_class)

        sample = x_test[sample_idx:sample_idx+1]

        # Instantiate a SaliencyMapMethod attack object
        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        jsma_params = {'theta': 1., 'gamma': 0.1,
                    'clip_min': 0., 'clip_max': 1.,
                    'y_target': None}

        jsma_params['y_target'] = y_test[target_idx:target_idx+1]
        adv_x = jsma.generate_np(sample, **jsma_params)

        print('sample class:', np.argmax(y_test[sample_idx]))
        print('target class:', np.argmax(y_test[target_idx]))

        # Get array of output
        # updated by hak hyun kim 
        feed_dict = {x: adv_x}        
        probabilities = sess.run(preds, feed_dict)

        print('==========================================')
        def softmax(x):
            e_x = np.exp(x - np.max(x))
            return e_x / e_x.sum()
        print(softmax(probabilities))
        #print(probabilities)
        print('==========================================')

        print('{} class is recognized by {} '.format(sample_class, target_class))


    # save the adverisal image #
    two_d_img = (np.reshape(adv_x, (28, 28)) * 255).astype(np.uint8)
    from PIL import Image
    save_image = Image.fromarray(two_d_img)
    save_image = save_image.convert('RGB')
    save_image.save("res.png")

    sess.close()
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    #x##########################################################
    #Tensor("Placeholder:0", shape=(?, 28, 28, 1), dtype=float32)
    #<class 'tensorflow.python.framework.ops.Tensor'>
    ###########################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))
        return acc

    if clean_train:
        model = ModelBasicCNN(
            'model1', nb_classes, nb_filters
        )  # <cleverhans_tutorials.tutorial_models.ModelBasicCNN object at 0x7f81feaae240>

        preds = model.get_logits(
            x
        )  # Tensor("model1_1/dense/BiasAdd:0", shape=(?, 10), dtype=float32)

        loss = CrossEntropy(
            model, smoothing=label_smoothing
        )  # <cleverhans.loss.CrossEntropy object at 0x7f819466b470>

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph

        fgsm = ProjectedGradientDescent(
            model, sess=sess
        )  # TODO                     # <cleverhans.attacks.FastGradientMethod object at 0x7feabc77ce80>
        start = time.time()
        adv_x = fgsm.generate(
            x, **fgsm_params
        )  # Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32)

        #imagetest = np.squeeze(adv_x)
        #plt.imshow(imagetest)

        preds_adv = model.get_logits(
            adv_x
        )  # Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32)
        end = time.time()
        a = end - start
        print("Attack time = ")
        print(a)
        print("")

        #Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32)
        #Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        start = time.time()
        acc_result = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval',
                             True)
        end = time.time()

        b = end - start

        print("")
        print("Inference function time = ")
        print(b)
        print("")

        values = [b, acc_result * 100, 0, 0, 0]
        x_labels = [
            'Time(s)', 'Accuracy(%)', '', 'Method2 Time(s)',
            'Method2 Accuracy(%)'
        ]
        plt.bar(x_labels, values)
        plt.show()

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print("END!")
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64, num_threads=None,
                   label_smoothing=True):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    if label_smoothing:
        label_smooth = .1
        y_train = y_train.clip(label_smooth /
                               (nb_classes-1), 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.
    }
    rng = np.random.RandomState([2017, 8, 30])
    sess = tf.Session()

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = LossCrossEntropy(model, smoothing=0.1)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = LossCrossEntropy(model2, smoothing=0.1, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess, loss2, x, y, x_train, y_train, evaluate=evaluate2,
          args=train_params, rng=rng, var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
Exemple #13
0
def mnist_tutorial_cw(nb_classes=10, attack_iterations=100, targeted=True):

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    sample_class = int(input('input sample class(0-9): '))
    target_class = int(input('input target class(0-9): '))

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_test, y_test = get_mnist_data()
    sample_idx = get_mnist_idx(y_test, sample_class)
    target_idx = get_mnist_idx(y_test, target_class)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    ##################################
    #          Load Model            #
    ##################################
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        save_path = r'C:\Users\hhkim\Desktop\scc\cleverhans\cleverhans_tutorials\model_data\train_model'
        saver.restore(sess, save_path)

        ###########################################################################
        # Craft adversarial examples using Carlini and Wagner's approach
        ###########################################################################
        nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
        print('Crafting ' + str(1) + ' * ' + nb_adv_per_sample +
              ' adversarial examples')

        # Instantiate a CW attack object
        cw = CarliniWagnerL2(model, back='tf', sess=sess)

        if targeted:
            adv_input = x_test[sample_idx:sample_idx + 1]
            adv_ys = y_test[target_idx:target_idx + 1]
            yname = "y_target"
        else:
            adv_input = x_test[sample_idx:sample_idx + 1]
            adv_ys = None
            yname = "y"

        cw_params = {
            'binary_search_steps': 1,
            yname: adv_ys,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': 1,
            'initial_const': 10
        }

        adv = cw.generate_np(adv_input, **cw_params)

        # Add by Hahkyunkim
        feed_dict = {x: adv}
        probabilities = sess.run(preds, feed_dict)
        print(probabilities)

        #Save adversial image
        two_d_img = (np.reshape(adv, (28, 28)) * 255).astype(np.uint8)
        from PIL import Image
        save_image = Image.fromarray(two_d_img)
        save_image = save_image.convert('RGB')
        save_image.save("cw_attack_res.png")

        # Close TF session
        sess.close()
    return
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(file,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    nb_filters = 64

    ################ color training initialization ####################

    color_training_epochs = 5000
    color_learning_rate = 0.1
    colorCategory = [
        [0.0, 0.4],  # Black
        [0.3, 0.7],  # Grey
        [0.6, 1.0]  # White
    ]

    numColorInput = 1

    color_x = tf.placeholder(
        tf.float32,
        [None, numColorInput])  # mnist data image of shape 28*28=784
    color_y = tf.placeholder(
        tf.float32,
        [None, numColorOutput])  # 0-9 digits recognition => 10 classes

    # Set model weights
    color_W = tf.Variable(tf.zeros([numColorInput, numColorOutput]))
    color_b = tf.Variable(tf.zeros([numColorOutput]))
    color_pred_out = tf.matmul(color_x, color_W) + color_b  # Softmax

    color_cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=color_pred_out,
                                                labels=color_y))
    # Gradient Descent
    color_optimizer = tf.train.GradientDescentOptimizer(
        color_learning_rate).minimize(color_cost)

    # Test model
    color_argmax = tf.argmax(color_pred_out, 1)
    color_correct_prediction = tf.equal(tf.argmax(color_pred_out, 1),
                                        tf.argmax(color_y, 1))
    # Calculate accuracy
    color_accuracy = tf.reduce_mean(
        tf.cast(color_correct_prediction, tf.float32))

    # Graph for re-generating the original image into a new image by using trained color model
    pr_model_x = tf.placeholder(
        tf.float32,
        [None, n_input, numColorInput])  # mnist data image of shape 28*28=784
    pr_model_W = tf.placeholder(tf.float32,
                                [None, numColorInput, numColorOutput
                                 ])  # mnist data image of shape 28*28=784
    pr_model_b = tf.placeholder(tf.float32,
                                [None, numColorInput, numColorOutput
                                 ])  # mnist data image of shape 28*28=784
    pr_model_output = tf.one_hot(
        tf.argmax((tf.matmul(pr_model_x, pr_model_W) + pr_model_b), 2),
        numColorOutput)

    # Merge the random generated output for new image based on the colorCategory
    randomColorCategory = []
    for i in range(len(colorCategory)):
        tmp = []
        tmpRandomColorCategory = my_tf_round(
            tf.random_uniform(tf.shape(pr_model_x),
                              colorCategory[i][0],
                              colorCategory[i][1],
                              dtype=tf.float32), 2)
        tmp.append(tmpRandomColorCategory)
        randomColorCategory.append(tf.concat(tmp, 1))

    random_merge = tf.reshape(tf.concat(randomColorCategory, -1),
                              [-1, n_input, numColorOutput])
    random_color_set = tf.reduce_sum(
        tf.multiply(pr_model_output, random_merge), 2)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x = tf.reshape(random_color_set, shape=(-1, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        #'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        #'filename': os.path.split(model_path)[-1],
        'train_dir': save_dir,
        'filename': filename,
        'numColorOutput': numColorOutput
    }
    with sess.as_default():
        if hasattr(tf, "global_variables_initializer"):
            tf.global_variables_initializer().run()
        else:
            warnings.warn("Update your copy of tensorflow; future versions of "
                          "CleverHans may drop support for this version.")
            sess.run(tf.initialize_all_variables())

        ################# color training ####################
        print("Trying to load pr model from: " + model_path2)
        if os.path.exists(model_path2 + ".meta"):
            tf_model_load(sess, model_path2)
            c_w, c_b = sess.run([color_W, color_b])
            print("Load color trained model in training")
        else:
            # Training the color
            for epoch in range(color_training_epochs):
                outputColorY = []
                p1 = np.random.random(100)
                for i in range(len(p1)):
                    outputOverlapColorY = []
                    for j in range(len(colorCategory)):
                        if p1[i] >= colorCategory[j][0] and p1[
                                i] <= colorCategory[j][1]:
                            colorIndexSeq = []
                            for k in range(len(colorCategory)):
                                if j == k:
                                    colorIndexSeq.append(1)
                                else:
                                    colorIndexSeq.append(0)
                            outputOverlapColorY.append(colorIndexSeq)
                            #break

                    # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item
                    outputColorY.append(outputOverlapColorY[np.random.randint(
                        0, len(outputOverlapColorY))])

                inputColorX = p1.reshape(100, 1)
                _, c, c_w, c_b = sess.run(
                    [color_optimizer, color_cost, color_W, color_b],
                    feed_dict={
                        color_x: inputColorX,
                        color_y: outputColorY
                    })
                avg_cost = c

                # Evaluating color model
                outputColorY = []
                p1 = np.random.random(100)
                # Generate output for random color inputs (test case)
                for i in range(len(p1)):
                    for j in range(len(colorCategory)):
                        outputOverlapColorY = []
                        if p1[i] >= colorCategory[j][0] and p1[
                                i] <= colorCategory[j][1]:
                            colorIndexSeq = []
                            for k in range(len(colorCategory)):
                                if j == k:
                                    colorIndexSeq.append(1)
                                else:
                                    colorIndexSeq.append(0)
                            outputOverlapColorY.append(colorIndexSeq)
                            break

                    # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item
                    outputColorY.append(outputOverlapColorY[np.random.randint(
                        0, len(outputOverlapColorY))])

                inputColorX = p1.reshape(100, 1)
                # print(random_xs)
                acc, argmax = sess.run([color_accuracy, color_argmax],
                                       feed_dict={
                                           color_x: inputColorX,
                                           color_y: outputColorY
                                       })
                print("Epoch:", '%04d' % (epoch + 1) + "/" + str(color_training_epochs) + ", Cost= " + \
                      "{:.9f}".format(avg_cost) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc) + " ")
                # print(c_w)

            with tf.device('/CPU:0'):
                saver = tf.train.Saver(tf.global_variables(), max_to_keep=50)
                # Since training PR model is fast, we do not have to save multiple sessions for this
                save_path = os.path.join(save_dir2, filename2)
                saver.save(sess, save_path)
        ##################### end of color training ------------------------------

    ################# model training ####################
    rng = np.random.RandomState([2017, 8, 30])
    saveFileNum = 50
    saveFileNum = 500
    # saveFileNum = 1000
    model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum))
    # check if we've trained before, and if we have, use that pre-trained model
    print("Trying to load trained model from: " + model_path)
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
        print("Load trained model")
    else:
        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              args=train_params,
              rng=rng,
              save=True,
              c_w=c_w,
              c_b=c_b,
              pr_model_x=pr_model_x,
              random_color_set=random_color_set,
              pr_model_W=pr_model_W,
              pr_model_b=pr_model_b)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size, 'numColorOutput': numColorOutput}
    #accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params,
    #                   pred2=preds, c_w=c_w, c_b=c_b, pr_model_x=pr_model_x, random_color_set=random_color_set,
    #                   pr_model_W=pr_model_W, pr_model_b=pr_model_b)
    #assert x_test.shape[0] == test_end - test_start, x_test.shape
    #print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    #report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
            adv_inputs = x_test
        else:
            adv_inputs = x_test[:source_samples]
            adv_inputs = x_test

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv2 = cw.generate(x, **cw_params)
    cw_params[yname] = adv_ys
    adv = None
    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {
        'batch_size': np.minimum(nb_classes, source_samples),
        'numColorOutput': numColorOutput
    }
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv,
                                      y_test[idxs],
                                      args=eval_params)
        else:
            #adv_accuracy = model_eval(sess, x, y, preds, adv, y_test[
            #               :source_samples], args=eval_params)
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv,
                                      y_test,
                                      args=eval_params,
                                      pred2=preds,
                                      c_w=c_w,
                                      c_b=c_b,
                                      pr_model_x=pr_model_x,
                                      random_color_set=random_color_set,
                                      pr_model_W=pr_model_W,
                                      pr_model_b=pr_model_b,
                                      is_adv=True,
                                      ae=adv2)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')
    print("load save file: ", saveFileNum)
    # Compute the number of adversarial examples that were successfully found
    print('Test with adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=True,
                        nb_epochs=6,
                        batch_size=128,
                        nb_classes=10,
                        source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, channels)),
                    np.reshape(adv_x, (img_rows, img_cols, channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
Exemple #16
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              args=train_params,
              save=os.path.exists("models"),
              rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Exemple #17
0
def mnist_tutorial_cw(nb_classes=10, attack_iterations=100, targeted=True):

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_test, y_test = get_mnist_data()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    ############ Select sample and target class ############
    sample_class = int(input('input sample class(0-9): '))
    target_class = int(input('input target class(0-9): '))

    if sample_class<0 or sample_class>9 or target_class<0 or target_class>9 :
        print('input is wrong')
        return

    sample_idx = get_mnist_idx(y_test, sample_class)
    target_idx = get_mnist_idx(y_test, target_class)

    sample = x_test[sample_idx:sample_idx+1]
    target = y_test[target_idx:target_idx+1]
    ############ ############################## ############

    ##################################
    #          Load Model            #
    ##################################
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        saver = tf.train.import_meta_graph('./model/mnist_model.ckpt.meta')
        current_dir = os.getcwd()
        path = os.path.join(current_dir, 'model/mnist_model.ckpt')
        saver.restore(sess, path)

        ###########################################################################
        # Craft adversarial examples using Carlini and Wagner's approach
        ###########################################################################
        #nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
        #print('Crafting ' + str(1) + ' * ' + nb_adv_per_sample +
        #    ' adversarial examples')

        # Instantiate a CW attack object
        adv = mnist_cw_attack(sample, target, model, sess)

        # Prediction
        feed_dict = {x: adv}
        probabilities = sess.run(preds, feed_dict)
        print(probabilities)

        #Save adversial image
        two_d_img = (np.reshape(adv, (28, 28)) * 255).astype(np.uint8)
        from PIL import Image
        save_image = Image.fromarray(two_d_img)
        save_image = save_image.convert('RGB')
        save_image.save(SAVE_PATH)

        # Close TF session
        sess.close()
    return
Exemple #18
0
    pic = io.imread(path + "/" + filename, plugin='matplotlib')
    pic = transform.resize(pic, (img_rows, img_cols), preserve_range=True)
    my_data.append(pic)
my_data = np.array(my_data)

#要改变的图片格式入口
my_data = my_data.reshape(
    (my_data.shape[0], my_data.shape[1], my_data.shape[2], 1))

#训练图片格式入口
print("STEP 3: Start training model...")
x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))

sess = tf.Session(config=tf.ConfigProto(**config_args))
model = ModelBasicCNN('model1', nb_classes, NB_FILTERS)
preds = model.get_logits(x)
loss = CrossEntropy(model, smoothing=0.1)

train(sess,
      loss,
      x_train,
      y_train,
      evaluate=None,
      args=train_params,
      rng=rng,
      var_list=model.get_params())

fgsm = FastGradientMethod(model, sess=sess)
adv_x = fgsm.generate(x, **fgsm_params)
preds_adv = model.get_logits(adv_x)
adv_image = adv_x.eval(session=sess, feed_dict={x: my_data})
Exemple #19
0
def simulate_jsma():
    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Get MNIST test data
    x_test, y_test = get_mnist_data()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = ModelBasicCNN('model1', 10, 64)
    preds = model.get_logits(x)
    print("Defined TensorFlow model graph.")

    ############ Select sample and target class ############
    sample_class = int(input('input sample class(0-9): '))
    target_class = int(input('input target class(0-9): '))

    if sample_class < 0 or sample_class > 9 or target_class < 0 or target_class > 9:
        print('input is wrong')
        return

    sample_idx = get_mnist_idx(y_test, sample_class)
    target_idx = get_mnist_idx(y_test, target_class)

    sample = x_test[sample_idx:sample_idx + 1]
    target = y_test[target_idx:target_idx + 1]
    ############ ############################## ############

    ##################################
    #          Load Model            #
    ##################################
    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        saver = tf.train.import_meta_graph('./model/mnist_model.ckpt.meta')
        current_dir = os.getcwd()
        path = os.path.join(current_dir, 'model/mnist_model.ckpt')
        saver.restore(sess, path)

        adv_x = mnist_jsma_attack(sample, target, model, sess)

        # Instantiate a SaliencyMapMethod attack object
        """
        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        jsma_params = {'theta': 1., 'gamma': 0.1,
                    'clip_min': 0., 'clip_max': 1.,
                    'y_target': None}

        jsma_params['y_target'] = y_test[target_idx:target_idx+1]
        adv_x = jsma.generate_np(sample, **jsma_params)
        """

        print('sample class:', np.argmax(y_test[sample_idx]))
        print('target class:', np.argmax(y_test[target_idx]))

        # Get array of output
        feed_dict = {x: adv_x}
        probabilities = sess.run(preds, feed_dict)

        print('==========================================')

        def softmax(x):
            e_x = np.exp(x - np.max(x))
            return e_x / e_x.sum()

        print(softmax(probabilities))
        print('==========================================')
        print('{} class is recognized by {} '.format(sample_class,
                                                     target_class))

    # save the adverisal image #
    two_d_img = (np.reshape(adv_x, (28, 28)) * 255).astype(np.uint8)
    from PIL import Image
    save_image = Image.fromarray(two_d_img)
    save_image = save_image.convert('RGB')
    save_image.save(SAVE_PATH)

    sess.close()