def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs,
              batch_size, learning_rate):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return:
    """

    # Define TF model graph (for the black-box model)
    if DATASET == "mnist":
        model = MNISTModel(use_log=True).model
    else:
        model = CIFARModel(use_log=True).model
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    if FLAGS.load_pretrain:
        tf_model_load(sess)
    else:
        train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess,
                    x,
                    y,
                    predictions,
                    X_train,
                    Y_train,
                    verbose=True,
                    save=True,
                    args=train_params)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
def checkpoint_load(sess, checkpoint_dir, moving_variables=None):
	ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
	if ckpt and ckpt.model_checkpoint_path:
		if moving_variables:
			variable_averages = tf.train.ExponentialMovingAverage(0.9)
			variables_to_restore = variable_averages.variables_to_restore(moving_variables)
			saver = tf.train.Saver(variables_to_restore)
			saver.restore(sess, ckpt.model_checkpoint_path)
			print(sess.run(moving_variables))
		tf_model_load(sess, ckpt.model_checkpoint_path)
		return True

	print('restore fails: please provide correct checkpoint directory')
	return False
Ejemplo n.º 3
0
def checkpoint_load(sess, checkpoint_dir, moving_variables=None):
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        if moving_variables:
            variable_averages = tf.train.ExponentialMovingAverage(0.9)
            variables_to_restore = variable_averages.variables_to_restore(
                moving_variables)
            saver = tf.train.Saver(variables_to_restore)
            saver.restore(sess, ckpt.model_checkpoint_path)
            print(sess.run(moving_variables))
        tf_model_load(sess, ckpt.model_checkpoint_path)
        return True

    print('restore fails: please provide correct checkpoint directory')
    return False
Ejemplo n.º 4
0
def __test():
    # report = AccuracyReport()
    tf.set_random_seed(1234)
    sess = tf.Session()
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")
    # Train an MNIST model
    train_params = {
        'nb_epochs': NB_EPOCHS,
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'filename': os.path.split(MODEL_PATH)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': BATCH_SIZE}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
Ejemplo n.º 5
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     model_path=MODEL_PATH,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        print('start')
        #model = CNN('model1', nb_classes, isL2 = True)
        model = make_wresnet(scope='model1')
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)
        tf_model_load(
            sess,
            '/nfs/nas4/data-hanwei/data-hanwei/DATA/models/wresnet/cifar1')

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,
                                               momentum=0)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,momentum=0.9)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9)
        train(sess,
              x,
              y,
              model,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params(),
              optimizer=optimizer)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    return report
Ejemplo n.º 6
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=False, nb_epochs=6,
                      batch_size=128, nb_classes=10, source_samples=1,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    #tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement=True
    sess = tf.Session(config=config)
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")
    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy
    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)
    fgsm = FastGradientMethod(model, sess=sess)

    result = np.zeros((5,len(X_test)))
    strength = np.zeros((3,len(X_test)))

    adv_ys = None
    yname = "y"

    cw_params = {'binary_search_steps': 1,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples,
                 'initial_const': 10}
    fgsm_eps = [0.1,0.3, 0.5]
    for j in fgsm_eps:
        fgsm_params = {'eps': j,
                   'clip_min': 0.,
                   'clip_max': 1.}  
                
        for i in range(len(X_test)):
            feed_dict = {x: X_test[i].reshape((1,28,28,1))}
            Classes0 = preds.eval(feed_dict=feed_dict,session=sess)
            Class0 = np.argmax(Classes0)
            result[0,i] = Class0
            adv_inputs = X_test[i]
            adv_inputs = adv_inputs.reshape((1,28,28,1))
            #adv = cw.generate_np(adv_inputs,**cw_params)
            
            adv = fgsm.generate_np(adv_inputs, **fgsm_params)
            pdb.set_trace()
	    feed_dict = {x: adv}
            Classes1 = preds.eval(feed_dict=feed_dict,session=sess)
            Class1 = np.argmax(Classes1)
            result[1,i] = Class1
            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                           axis=(1, 2, 3))**.5)
            strength[0,i] = percent_perturbed

            adv2 = cw.generate_np(adv,**cw_params)
            feed_dict = {x: adv2}
            Classes2 = preds.eval(feed_dict=feed_dict,session=sess)
            Class2 = np.argmax(Classes2)
            result[2,i] = Class2
            # Compute the average distortion introduced by the algorithm
            percent_perturbed2 = np.mean(np.sum((adv2 - adv)**2,
                                           axis=(1, 2, 3))**.5)
            strength[1,i] = percent_perturbed2
            
            adv_f = sig.medfilt(adv,(1,3,3,1))
            feed_dict = {x: adv_f}
            Classes1 = preds.eval(feed_dict=feed_dict,session=sess)
            Class1 = np.argmax(Classes1)
            result[3,i] = Class1
            # Compute the average distortion introduced by the algorithm
            #percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
            #                               axis=(1, 2, 3))**.5)
            #strength[0,i] = percent_perturbed

            adv2_f = cw.generate_np(adv_f,**cw_params)
            feed_dict = {x: adv2_f}
            Classes2 = preds.eval(feed_dict=feed_dict,session=sess)
            Class2 = np.argmax(Classes2)
            result[4,i] = Class2
            # Compute the average distortion introduced by the algorithm
            percent_perturbed2 = np.mean(np.sum((adv2_f - adv_f)**2,
                                           axis=(1, 2, 3))**.5)
            strength[2,i] = percent_perturbed2
            if i%100 == 0:
                print(i)
       # exit()
    # Close TF session
    sess.close()
    sio.savemat('fgsm_mnist.mat',{'adv_01':adv_01,'adv_03':adv_03, 'adv_05':adv_05 'strength':strength})
Ejemplo n.º 7
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params,
              save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
                for i in range(nb_classes)]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]],
                dtype=np.float32)
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for
                 instance in x_test[:source_samples]], dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape((source_samples *
                                                     nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples * nb_classes if
                 targeted else source_samples,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(
            sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 8
0
def mnist_tutorial_cw(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    viz_enabled=VIZ_ENABLED,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    source_samples=SOURCE_SAMPLES,
    learning_rate=LEARNING_RATE,
    attack_iterations=ATTACK_ITERATIONS,
    model_path=MODEL_PATH,
    targeted=TARGETED,
):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN("model1", nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "filename": os.path.split(model_path)[-1],
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {"batch_size": batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print("Test accuracy on legitimate test examples: {0}".format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else "1"
    print(
        "Crafting "
        + str(source_samples)
        + " * "
        + nb_adv_per_sample
        + " adversarial examples"
    )
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype="f")

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32
            )
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[:source_samples]],
                dtype=np.float32,
            )

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels)
        )
        adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
            (source_samples * nb_classes, nb_classes)
        )
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype="f")

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    if targeted:
        cw_params_batch_size = source_samples * nb_classes
    else:
        cw_params_batch_size = source_samples
    cw_params = {
        "binary_search_steps": 1,
        yname: adv_ys,
        "max_iterations": attack_iterations,
        "learning_rate": CW_LEARNING_RATE,
        "batch_size": cw_params_batch_size,
        "initial_const": 10,
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {"batch_size": np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(
                sess, x, y, preds, adv, y_test[:source_samples], args=eval_params
            )
            adv_accuracy = 1 - err

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print("--------------------------------------")

    # Compute the number of adversarial examples that were successfully found
    print("Avg. rate of successful adv. examples {0:.4f}".format(adv_accuracy))
    report.clean_train_adv_eval = 1.0 - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs) ** 2, axis=(1, 2, 3)) ** 0.5)
    print("Avg. L_2 norm of perturbations {0:.4f}".format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=True,
                        nb_epochs=6,
                        batch_size=128,
                        nb_classes=10,
                        source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(4254264)

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    # X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
    #                                               train_end=train_end,
    #                                               test_start=test_start,
    #                                               test_end=test_end)

    # Get notMNIST data
    # with np.load("notmnist.npz") as data:
    #     X_train, Y_train, X_test, Y_test = data['examples_train'], data['labels_train'], data['examples_test'], data['labels_test']

    # Get MNISTnotMNIST data
    with np.load("mnist.npz") as data:
        X_train, Y_train, X_test, Y_test = data['X_train'], data[
            'Y_train'], data['X_test'], data['Y_test']
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    # Define TF model graph
    model_path = "./"
    model_name = "clean_trained_mnist_model"
    model = make_basic_cnn(nb_classes=nb_classes)
    if tf_model_load(sess, file_path=os.path.join(model_path, model_name)):
        print(model_name, " reloaded.")
    preds = model.get_probs(x)
    # print('shape is', preds.get_shape())

    # clean_train = True
    # if clean_train:
    #     train_params = {
    #         'nb_epochs': nb_epochs,
    #         'batch_size': batch_size,
    #         'learning_rate': learning_rate
    #     }
    #     model_path = "./"
    #     model_name = "clean_trained__model_notmnist"
    #     rng = np.random.RandomState([1989, 12, 13])
    #     model = make_basic_cnn()
    #     preds = model.get_probs(x)
    #
    #     def evaluate():
    #         # Evaluate the accuracy of the MNIST model on legitimate test
    #         # examples
    #         eval_params = {'batch_size': batch_size}
    #         acc = model_eval(
    #             sess, x, y, preds, X_test, Y_test, args=eval_params)
    #         report.clean_train_clean_eval = acc
    #         assert X_test.shape[0] == test_end - test_start, X_test.shape
    #         print('Test accuracy on legitimate examples: %0.4f' % acc)
    #     model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,args=train_params, rng=rng)
    #
    #     save_path = os.path.join(model_path, model_name)
    #     saver = tf.train.Saver()
    #     saver.save(sess, save_path)
    #     _logger.info("Completed model training and saved at: " + str(save_path))
    # print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    # train_params = {
    #     'nb_epochs': nb_epochs,
    #     'batch_size': batch_size,
    #     'learning_rate': learning_rate,
    #     'train_dir': model_path,
    #     'filename': model_name
    # }
    # sess.run(tf.global_variables_initializer())
    # rng = np.random.RandomState([2017, 8, 30])
    # model_train(sess, x, y, preds, X_train, Y_train, save=True, args=train_params,
    #             rng=rng)
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    # report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')
    # misclassify
    results2 = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    # grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    # grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    rng = np.random.RandomState([1358, 23, 234])
    index_shuf = list(range(len(X_test)))
    rng.shuffle(index_shuf)
    X_test = X_test[index_shuf]
    Y_test = Y_test[index_shuf]

    # create a dictionary to keep track of occurence of each letter
    # create a 2D array to kee track of successful attacks
    occurence = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0}
    # 10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0}
    rate_table = np.zeros((nb_classes, nb_classes), dtype='f')

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # add one to current class occurence
        occurence[current_class] += 1

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)
            # misclassify
            res2 = int(model_argmax(sess, x, preds, adv_x) != current_class)
            # if success, add one to successful rate table
            if res == 1:
                rate_table[current_class, target] += 1.

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            # if viz_enabled:
            #     figure = pair_visual(
            #         np.reshape(sample, (img_rows, img_cols)),
            #         np.reshape(adv_x, (img_rows, img_cols)), figure)

            # Add our adversarial example to our grid data
            # grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #     adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            results2[target, sample_ind] = res2
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Close TF session
    sess.close()

    # Compute success rate of each letter attacking each target
    for cur in range(nb_classes):
        if occurence[cur] != 0:
            rate_table[cur, :] /= float(occurence[cur])
    print("The table of rate of successful attacking is shown below")
    print(rate_table)
    print("the number of occurrence of each class is ", occurence)

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    # misclassify
    succ_rate2 = float(np.sum(results2)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    print(
        'Avg. rate of misclassified adv. examples {0:.4f}'.format(succ_rate2))
    # report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Finally, block & display a grid of all the adversarial examples
    # if viz_enabled:
    #     import matplotlib.pyplot as plt
    #     plt.close(figure)
    #     _ = grid_visual(grid_viz_data)

    return report
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     architecture=ARCHITECTURE,
                     load_model=LOAD_MODEL,
                     ckpt_dir='None',
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(int(time.time() * 1000) % 2**31)
    np.random.seed(int(time.time() * 1001) % 2**31)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')

    pgd_train = None
    if FLAGS.load_pgd_train_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_train_samples))
        x_train = np.load(os.path.join(pgd_path, 'train_clean.npy'))
        y_train = np.load(os.path.join(pgd_path, 'train_y.npy'))
        pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy'))
        if x_train.shape[1] == 3:
            x_train = x_train.transpose((0, 2, 3, 1))
            pgd_train = pgd_train.transpose((0, 2, 3, 1))
        if len(y_train.shape) == 1:
            y_tmp = np.zeros((len(y_train), np.max(y_train) + 1),
                             y_train.dtype)
            y_tmp[np.arange(len(y_tmp)), y_train] = 1.
            y_train = y_tmp

    x_test, y_test = data.get_set('test')
    pgd_test = None
    if FLAGS.load_pgd_test_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_test_samples))
        x_test = np.load(os.path.join(pgd_path, 'test_clean.npy'))
        y_test = np.load(os.path.join(pgd_path, 'test_y.npy'))
        pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy'))
        if x_test.shape[1] == 3:
            x_test = x_test.transpose((0, 2, 3, 1))
            pgd_test = pgd_test.transpose((0, 2, 3, 1))
        if len(y_test.shape) == 1:
            y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype)
            y_tmp[np.arange(len(y_tmp)), y_test] = 1.
            y_test = y_tmp

    train_idcs = np.arange(len(x_train))
    np.random.shuffle(train_idcs)
    x_train, y_train = x_train[train_idcs], y_train[train_idcs]
    if pgd_train is not None:
        pgd_train = pgd_train[train_idcs]
    test_idcs = np.arange(len(x_test))[:FLAGS.test_size]
    np.random.shuffle(test_idcs)
    x_test, y_test = x_test[test_idcs], y_test[test_idcs]
    if pgd_test is not None:
        pgd_test = pgd_test[test_idcs]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    pgd_params = {
        # ord: ,
        'eps': FLAGS.eps,
        'eps_iter': (FLAGS.eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }
    cw_params = {
        'binary_search_steps': FLAGS.cw_search_steps,
        'max_iterations': FLAGS.cw_steps,  #1000
        'abort_early': True,
        'learning_rate': FLAGS.cw_lr,
        'batch_size': batch_size,
        'confidence': 0,
        'initial_const': FLAGS.cw_c,
        'clip_min': 0,
        'clip_max': 255
    }

    # Madry dosen't divide by 255
    x_train *= 255
    x_test *= 255
    if pgd_train is not None:
        pgd_train *= 255
    if pgd_test is not None:
        pgd_test *= 255

    print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train)))
    print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test)))

    print(
        'clip_min : {}, clip_max : {}  >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<'
        .format(pgd_params['clip_min'], pgd_params['clip_max']))

    rng = np.random.RandomState()  # [2017, 8, 30]
    debug_dict = dict() if FLAGS.save_debug_dict else None

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                predictor=None,
                x_adv=None):
        if predictor is None:
            acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        else:
            do_eval(preds, x_set, y_set, report_key, is_adv=is_adv)
            if x_adv is not None:
                x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set],
                                        batch_size=batch_size)
                assert x_set.shape == x_set_adv.shape
                x_set = x_set_adv
            n_batches = math.ceil(x_set.shape[0] / batch_size)
            p_set, p_det = np.concatenate([
                predictor.send(x_set[b * batch_size:(b + 1) * batch_size])
                for b in tqdm.trange(n_batches)
            ]).T
            acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean()
            # if is_adv:
            # import IPython ; IPython.embed() ; exit(1)
            if FLAGS.save_debug_dict:
                debug_dict['x_set'] = x_set
                debug_dict['y_set'] = y_set
                ddfn = 'logs/debug_dict_{}.pkl'.format(
                    'adv' if is_adv else 'clean')
                if not os.path.exists(ddfn):
                    with open(ddfn, 'wb') as f:
                        pickle.dump(debug_dict, f)
                debug_dict.clear()
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples %s: %0.4f' %
                  (report_text, 'with correction'
                   if predictor is not None else 'without correction', acc))
            if is_adv is not None:
                label = 'test_acc_{}_{}'.format(
                    report_text, 'corrected' if predictor else 'uncorrected')
                swriter.add_scalar(label, acc)
                if predictor is not None:
                    detect = np.equal(p_det, is_adv).mean()
                    label = 'test_det_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    print(label, detect)
                    swriter.add_scalar(label, detect)
                    label = 'test_dac_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    swriter.add_scalar(
                        label,
                        np.equal(p_set,
                                 y_set[:len(p_set)].argmax(-1))[np.equal(
                                     p_det, is_adv)].mean())

        return acc

    if clean_train:
        if architecture == 'ConvNet':
            model = ModelAllConvolutional('model1',
                                          nb_classes,
                                          nb_filters,
                                          input_shape=[32, 32, 3])
        elif architecture == 'ResNet':
            model = ResNet(scope='ResNet')
        else:
            raise Exception('Specify valid classifier architecture!')

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        if load_model:
            model_name = 'naturally_trained'
            if FLAGS.load_adv_trained:
                model_name = 'adv_trained'
            if ckpt_dir is not 'None':
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(os.path.expanduser(ckpt_dir), model_name))
            else:
                ckpt = tf.train.get_checkpoint_state('./models/' + model_name)
            ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

            saver = tf.train.Saver(var_list=dict(
                (v.name.split('/', 1)[1].split(':')[0], v)
                for v in tf.global_variables()))
            saver.restore(sess, ckpt_path)
            print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

            initialize_uninitialized_global_variables(sess)

        else:

            def evaluate():
                do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

            train(sess,
                  loss,
                  None,
                  None,
                  dataset_train=dataset_train,
                  dataset_size=dataset_size,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params())

        logits_op = preds.op
        while logits_op.type != 'MatMul':
            logits_op = logits_op.inputs[0].op
        latent_x_tensor, weights = logits_op.inputs
        logits_tensor = preds

        nb_classes = weights.shape[-1].value

        if not FLAGS.save_pgd_samples:
            noise_eps = FLAGS.noise_eps.split(',')
            if FLAGS.noise_eps_detect is None:
                FLAGS.noise_eps_detect = FLAGS.noise_eps
            noise_eps_detect = FLAGS.noise_eps_detect.split(',')
            if pgd_train is not None:
                pgd_train = pgd_train[:FLAGS.n_collect]
            if not FLAGS.passthrough:
                predictor = tf_robustify.collect_statistics(
                    x_train[:FLAGS.n_collect],
                    y_train[:FLAGS.n_collect],
                    x,
                    sess,
                    logits_tensor=logits_tensor,
                    latent_x_tensor=latent_x_tensor,
                    weights=weights,
                    nb_classes=nb_classes,
                    p_ratio_cutoff=FLAGS.p_ratio_cutoff,
                    noise_eps=noise_eps,
                    noise_eps_detect=noise_eps_detect,
                    pgd_eps=pgd_params['eps'],
                    pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
                    pgd_iters=pgd_params['nb_iter'],
                    save_alignments_dir='logs/stats'
                    if FLAGS.save_alignments else None,
                    load_alignments_dir=os.path.expanduser(
                        '~/data/advhyp/madry/stats')
                    if FLAGS.load_alignments else None,
                    clip_min=pgd_params['clip_min'],
                    clip_max=pgd_params['clip_max'],
                    batch_size=batch_size,
                    num_noise_samples=FLAGS.num_noise_samples,
                    debug_dict=debug_dict,
                    debug=FLAGS.debug,
                    targeted=False,
                    pgd_train=pgd_train,
                    fit_classifier=FLAGS.fit_classifier,
                    clip_alignments=FLAGS.clip_alignments,
                    just_detect=FLAGS.just_detect)
            else:

                def _predictor():
                    _x = yield
                    while (_x is not None):
                        _y = sess.run(preds, {x: _x}).argmax(-1)
                        _x = yield np.stack((_y, np.zeros_like(_y)), -1)

                predictor = _predictor()
            next(predictor)
            if FLAGS.save_alignments:
                exit(0)

            # Evaluate the accuracy of the model on clean examples
            acc_clean = do_eval(preds,
                                x_test,
                                y_test,
                                'clean_train_clean_eval',
                                False,
                                predictor=predictor)

        # Initialize the PGD attack object and graph
        if FLAGS.attack == 'pgd':
            pgd = MadryEtAl(model, sess=sess)
            adv_x = pgd.generate(x, **pgd_params)
        elif FLAGS.attack == 'cw':
            cw = CarliniWagnerL2(model, sess=sess)
            adv_x = cw.generate(x, **cw_params)
        elif FLAGS.attack == 'mean':
            pgd = MadryEtAl(model, sess=sess)
            mean_eps = FLAGS.mean_eps * FLAGS.eps

            def _attack_mean(x):
                x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1))
                x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps,
                                                     mean_eps)
                x_noisy = tf.clip_by_value(x_noisy, 0, 255)
                x_pgd = pgd.generate(x_noisy, **pgd_params)
                x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps)
                x_clip = tf.maximum(x_clip, x_many - FLAGS.eps)
                x_clip = tf.clip_by_value(x_clip, 0, 255)
                return x_clip

            adv_x = tf.map_fn(_attack_mean, x)
            adv_x = tf.reduce_mean(adv_x, 1)

        preds_adv = model.get_logits(adv_x)

        if FLAGS.save_pgd_samples:
            for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test,
                                                              'test')):
                train_batches = math.ceil(len(ds) / FLAGS.batch_size)
                train_pgd = np.concatenate([
                    sess.run(adv_x, {
                        x:
                        ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size]
                    }) for b in tqdm.trange(train_batches)
                ])
                np.save('logs/{}_clean.npy'.format(name), ds / 255.)
                np.save('logs/{}_y.npy'.format(name), y)
                train_pgd /= 255.
                np.save('logs/{}_pgd.npy'.format(name), train_pgd)
            exit(0)

        # Evaluate the accuracy of the model on adversarial examples
        if not FLAGS.load_pgd_test_samples:
            acc_pgd = do_eval(preds_adv,
                              x_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor,
                              x_adv=adv_x)
        else:
            acc_pgd = do_eval(preds,
                              pgd_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor)
        swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0)

        print('Repeating the process, using adversarial training')

    exit(0)
    # Create a new model and train it to be robust to MadryEtAl
    if architecture == 'ConvNet':
        model2 = ModelAllConvolutional('model2',
                                       nb_classes,
                                       nb_filters,
                                       input_shape=[32, 32, 3])
    elif architecture == 'ResNet':
        model = ResNet()
    else:
        raise Exception('Specify valid classifier architecture!')

    pgd2 = MadryEtAl(model2, sess=sess)

    def attack(x):
        return pgd2.generate(x, **pgd_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For some attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    if load_model:
        if ckpt_dir is not 'None':
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained'))
        else:
            ckpt = tf.train.get_checkpoint_state('./models/adv_trained')
        ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

        assert ckpt_path and tf_model_load(
            sess, file_path=ckpt_path), '\nMODEL LOADING FAILED'
        print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

        initialize_uninitialized_global_variables(sess)

    else:

        def evaluate2():
            # Accuracy of adversarially trained model on legitimate test inputs
            do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
            # Accuracy of the adversarially trained model on adversarial
            # examples
            do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

        # Perform and evaluate adversarial training
        train(sess,
              loss2,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate2,
              args=train_params,
              rng=rng,
              var_list=model2.get_params())

    # Evaluate model
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    return report
Ejemplo n.º 11
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=False,
                      nb_epochs=6,
                      batch_size=128,
                      nb_classes=10,
                      source_samples=1,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()
    file = read_mat_file(filename)
    label = file["label"]
    data = file["data"]
    #data[data>1]= 1
    #data[data<0]= 0
    adv_data = data[10000:80000, :, :, :]
    cw = adv_data[0::7, :, :, :]
    fgsm01 = adv_data[1::7, :, :, :]
    fgsm03 = adv_data[2::7, :, :, :]
    fgsm05 = adv_data[3::7, :, :, :]
    gaussian01 = adv_data[4::7, :, :, :]
    gaussian03 = adv_data[5::7, :, :, :]
    gaussian05 = adv_data[6::7, :, :, :]

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    #tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement=True
    sess = tf.Session(config=config)
    print("Created TensorFlow session.")
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")
    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    args=train_params,
                    save=os.path.exists("models"),
                    rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    pdb.set_trace()
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy
Ejemplo n.º 12
0
def mnist_tutorial_deepfool(train_start=0, train_end=60000, #读60000训练
                            test_start=0,test_end=10000, #读10000测试
                            viz_enabled=True, nb_epochs=6,
                            batch_size=128, nb_classes=2, source_samples=10,
                            learning_rate=0.001, attack_iterations=100,
                            model_path=os.path.join("models", "mnist")):
    """
    MNIST tutorial for Deepfool's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples激活对抗例子
    :param nb_epochs: number of epochs to train model(一个epoch指代所有的数据送入网络中完成一次前向计算及反向传播的过程。)
    :param batch_size: size of training batches
    :param nb_classes: number of output classes(输出几类)
    :param source_samples: number of test inputs to attack(测试输入用于攻击的数量)
    :param learning_rate: learning rate for training(学习率)
    :param model_path: path to the model file(文件路径)
    :param attack_iterations: 攻击迭代次数
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies精确度报告
    report = AccuracyReport()

    # MNIST-specific dimensions图像尺寸28*28*1
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_picklable_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow(构建训练模型)
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 8, 9])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)
        print("save success")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a DeepFool attack object
    deepfool = DeepFool(model, back='tf', sess=sess)


    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][1] for i in range(10)]
    print("idxs:",idxs)

    # construct adv_inputs
    grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')
    print("grid_viz_data",grid_viz_data.shape)
    adv_inputs = X_test[idxs].reshape([-1,28,28,1])

    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': attack_iterations,
                       'nb_classes': 10,
                       'clip_min': 0.,
                       'clip_max': 1.}

    adv = deepfool.generate_np(adv_inputs, **deepfool_params)

    print("adv success")

    adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                args={'batch_size': 10})

    for j in range(10):
        grid_viz_data[j, 0] = adv_inputs[j]
        grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 13
0
def main(argv=None):
  
  from cleverhans_tutorials import check_installation
  check_installation(__file__)
  
  if not os.path.exists( CONFIG.SAVE_PATH ):
    os.makedirs( CONFIG.SAVE_PATH )
  save_path_data = CONFIG.SAVE_PATH + 'data/'
  if not os.path.exists( save_path_data ):
    os.makedirs( save_path_data )
  model_path = CONFIG.SAVE_PATH + '../all/' +  CONFIG.DATASET + '/'
  if not os.path.exists( model_path ):
    os.makedirs( model_path )
    os.makedirs( model_path + 'data/' )
  
  nb_epochs = FLAGS.nb_epochs
  batch_size = FLAGS.batch_size
  learning_rate = FLAGS.learning_rate
  nb_filters = FLAGS.nb_filters
  len_x = int(CONFIG.NUM_TEST/2)
  
  start = time.time()

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set seeds to improve reproducibility
  if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10':
    tf.set_random_seed(1234)
    np.random.seed(1234)
    rd.seed(1234)
  elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims':
    tf.set_random_seed(13)
    np.random.seed(1234)
    rd.seed(0)          
  
  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
  tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 
  sess = tf.Session(config=tf_config)   
  
  if CONFIG.DATASET == 'mnist':
    # Get MNIST data
    mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
  elif CONFIG.DATASET == 'cifar10':
    # Get CIFAR10 data
    data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')                             
  elif CONFIG.DATASET == 'moon':
    # Create a two moon example
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                   y_train1, x_test1,
                                                                   y_test1)
  elif CONFIG.DATASET == 'dims':
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                     y_train1,x_test1,
                                                                     y_test1)
    x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS)

  np.save(os.path.join(save_path_data, 'x_test'), x_test)
  np.save(os.path.join(save_path_data, 'y_test'), y_test)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': 1}
  rng = np.random.RandomState([2017, 8, 30])
  
  with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi:

    def do_eval(adv_x, preds, x_set, y_set, report_key):
      acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set,
                                          y_set, args=eval_params)
      setattr(report, report_key, acc)
      if report_key:
        print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi)
      return pred_np, adv_x_np
    
    if CONFIG.DATASET == 'mnist':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelBasicCNN('model1', nb_classes, nb_filters)
    elif CONFIG.DATASET == 'cifar10':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                    input_shape=[32, 32, 3])
    elif CONFIG.DATASET == 'moon':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelMLP('model1', nb_classes)
    elif CONFIG.DATASET == 'dims':
      trained_model_path = save_path_data + 'trained_model'
      model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS)
      
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    
    def evaluate():
      _, _ = do_eval(x, preds, x_test, y_test, 'test during train')
    
    if os.path.isfile( trained_model_path + '.index' ):
      tf_model_load(sess, trained_model_path)
    else:
      if CONFIG.DATASET == 'mnist':
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'cifar10':
        train(sess, loss, None, None,
              dataset_train=dataset_train, dataset_size=dataset_size,
              evaluate=evaluate, args=train_params, rng=rng,
              var_list=model.get_params())
      elif CONFIG.DATASET == 'moon':
        train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'dims':
        train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      saver = tf.train.Saver()
      saver.save(sess, trained_model_path)
    
    # Evaluate the accuracy on test examples
    if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ):
      logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy')
    else:
      _, _ = do_eval(x, preds, x_train, y_train, 'train')
      logits_0, _ = do_eval(x, preds, x_test, y_test, 'test')
      np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) 
    
    if CONFIG.DATASET == 'moon':
      num_grid_points = 5000
      if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ):
        x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy')
        logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy')
      else:
        xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) 
        x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T
        y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64')
        x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1)
        logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh')
        x_mesh = np.squeeze(x_mesh)
        np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh)
        np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh)
        
    points_x = x_test[:len_x]
    points_y = y_test[:len_x]
    points_x_bar = x_test[len_x:]
    points_y_bar = y_test[len_x:] 
     
    # Initialize the CW attack object and graph
    cw = CarliniWagnerL2(model, sess=sess) 
    
    # first attack
    attack_params = {
        'learning_rate': CONFIG.CW_LEARNING_RATE,
        'max_iterations': CONFIG.CW_MAX_ITERATIONS
      }
    
    if CONFIG.DATASET == 'moon':
     
      out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path)
      attack_params['const_a_min'] = out_a
      attack_params['const_a_max'] = 100
    
    adv_x = cw.generate(x, **attack_params) 
      
    if os.path.isfile( save_path_data + 'images_once_attacked.npy' ):
      adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy')
      logits_1 = np.load(save_path_data + 'logits_once_attacked.npy')
    else:
      #Evaluate the accuracy on adversarial examples
      preds_adv = model.get_logits(adv_x)
      logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar,
                                    'test once attacked')
      np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1)
      np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1)
      
    # counter attack 
    attack_params['max_iterations'] = 1024
      
    if CONFIG.DATASET == 'moon':  
      
      out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test),
                                                np.squeeze(adv_img_1), model_path,
                                                CONFIG.SAVE_PATH)
      attack_params['learning_rate'] = out_alpha2
      attack_params['const_a_min'] = -1
      attack_params['max_iterations'] = 2048
      
      plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh,
                logits_mesh)
      
    adv_adv_x = cw.generate(x, **attack_params) 
      
    x_k = np.concatenate((points_x, adv_img_1), axis=0)
    y_k = np.concatenate((points_y, logits_1), axis=0)
    
    if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ):
      adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy')
      logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy')
    else:
      # Evaluate the accuracy on adversarial examples
      preds_adv_adv = model.get_logits(adv_adv_x)
      logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k,
                                    'test twice attacked')   
      
      np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2)
      np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2)
    
    if CONFIG.DATASET == 'moon':  
      plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x],
                CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh)
      plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:],
                CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh)
      test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2,
                 CONFIG.SAVE_PATH)
 
  compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x],
                    logits_2[:len_x], CONFIG.SAVE_PATH) 
  
  if x_test.shape[-1] > 1:
    num_axis=(1,2,3)
  else:
    num_axis=(1,2)
    
  D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis)))
  D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]),
                                    axis=num_axis)))
  D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:],
                                logits_0[:len_x], logits_2[:len_x])
      
  if D_p_mod != [] and D_p_p_mod != []:
    plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
      
  plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH)
  
  print('Time needed:', time.time()-start)

  return report
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    binary = True if FLAGS.binary else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations

    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            model = make_scaled_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase, 'fp_', input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(
            sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv:
        from cleverhans.attacks import FastGradientMethod
        fgsm = FastGradientMethod(model, back='tf', sess=sess)
        fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
        adv_x_train = fgsm.generate(x, phase, **fgsm_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                    evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                        predictions_adv=preds_adv, evaluate=evaluate, args=train_params,
                        save=save, rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase,
                          feed={phase: False}, args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(
            nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': att_batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels,
                                  phase=phase, args=eval_params)
    else:
        assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test,
                                  phase=phase, args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((X_test_adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()

    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
Ejemplo n.º 15
0
def main(argv=None):
    tf.set_random_seed(1234)
    sess = tf.Session()
    keras.backend.set_session(sess)

    X_train, Y_train, X_test, Y_test = data_lmrc()
    Y_train = Y_train.clip(.1 / 9., 1. - .1)

    x = tf.placeholder(tf.float32, shape=(None, 128, 128, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model = cnn_model(img_rows=128, img_cols=128, channels=3)
    predictions = model(x)

    def evaluate():
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'train_dir': FLAGS.train_dir,
        'filename': FLAGS.filename
    }

    model_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    predictions,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    save=True)

    wrap = KerasModelWrapper(model)

    nb_classes = 10
    targeted = False
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'

    n_adv = 1000
    cw = CarliniWagnerL2(model, back='tf', sess=sess)
    adv_inputs = X_test[:n_adv]
    adv_ys = None
    yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': 10,
        'initial_const': 10,
    }
    adv = cw.generate_np(adv_inputs, **cw_params)

    #nFeatures = np.ndarray(shape=(n_adv,3))
    #for i in range(0,n_adv):
    #    img = adv_inputs[i]
    #    fast = cv2.FastFeatureDetector_create()
    #    kp = fast.detect(img,None)
    #    nFeatures[i][0] = len(kp)
    #    img = adv[i]
    #    fast = cv2.FastFeatureDetector_create()
    #    kp = fast.detect(img,None)
    #    nFeatures[i][1] = len(kp)
    #    nFeatures[i][2] = int(np.argmax(Y_test[i]))

    #print('Format: Mean(Std)')
    #for i in range(0,10):
    #    pdb.set_trace()
    #    rows = np.where(nFeatures[:,2] == i)
    #    data = np.delete(nFeatures[rows], 2, 1)
    #    mean = np.mean(data, 0)
    #    std = np.std(data, 0)
    #    print('Class{0} : Original Image {1:.2f}({2:.2f}) Adversarial Image {3:.2f}({4:.2f})'.format(i, mean[0], std[0], mean[1], std[1]))

    adv_den = np.zeros(adv.shape)
    for i in range(0, n_adv):
        img = adv[i] * 255
        img = img.astype('uint8')
        dst = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
        dst = dst.astype('float32')
        dst /= 255
        adv_den[i] = dst

    eval_params = {'batch_size': np.minimum(nb_classes, 10)}
    original_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   predictions,
                                   adv_inputs,
                                   Y_test[:n_adv],
                                   args=eval_params)
    adv_accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              adv,
                              Y_test[:n_adv],
                              args=eval_params)
    print('Accuracy on original images {0:.4f}'.format(original_accuracy))
    print('Accuracy on adversarial images {0:.4f}'.format(adv_accuracy))

    adv_accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              adv_den,
                              Y_test[:n_adv],
                              args=eval_params)
    print(
        'Accuracy on denoised adversarial images {0:.4f}'.format(adv_accuracy))

    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations adversarial samples {0:.4f}'.format(
        percent_perturbed))
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of denoised perturbations {0:.4f}'.format(
        percent_perturbed))

    sess.close()
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      nb_classes=10,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model.get_probs(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    args=train_params,
                    save=os.path.exists("models"),
                    rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    s = []
    for i in range(0, len(X_test), 1):
        pred = sess.run(preds, {x: X_test[i:i + 1]})
        print(pred)
        print(Y_test[i:i + 1])
        s.append(np.sort(pred)[0, -1] - np.sort(pred)[0, -2])

    #Draw a histogram
    def draw_hist(myList, Title, Xlabel, Ylabel):
        plt.hist(myList,
                 np.arange(0, 1, 0.01),
                 normed=True,
                 stacked=True,
                 facecolor='blue')
        plt.xlabel(Xlabel)
        plt.ylabel(Ylabel)
        plt.title(Title)
        plt.show()

    draw_hist(myList=s,
              Title='legitimate',
              Xlabel='difference between max and second largest',
              Ylabel='Probability')

    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in X_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in X_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, 1))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)
    preds_adv = model.get_probs(adv)
    pred = sess.run(preds_adv, {x: adv_inputs})
    '''
    s = []
    for i in range(0,len(adv_inputs),1):
        print(pred[i])
        s.append((np.sort(pred[i])[-1])-(np.sort(pred[i])[-2]))
    
   #Draw a histogram
    def draw_hist(myList,Title,Xlabel,Ylabel):
         plt.hist(myList,np.arange(0,1,0.01),normed=True,stacked=True,facecolor='red')
         plt.xlabel(Xlabel)       
         plt.ylabel(Ylabel)
         plt.title(Title)
         plt.show()
    draw_hist(myList=s,Title='adversarial',Xlabel='difference between max and second largest',
               Ylabel='Probability')
    '''

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, Y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, Y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    return report
def mnist_attack(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 viz_enabled=True,
                 nb_epochs=6,
                 batch_size=128,
                 nb_filters=64,
                 nb_samples=10,
                 learning_rate=0.001,
                 eps=0.3,
                 attack=0,
                 attack_iterations=100,
                 model_path=None,
                 targeted=False,
                 binary=False,
                 scale=False,
                 rand=False,
                 debug=None,
                 test=False,
                 data_dir=None,
                 delay=0,
                 adv=0,
                 nb_iter=40):
    """
    MNIST tutorial for generic attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param nb_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1237)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    if debug:
        set_log_level(logging.DEBUG)
    else:
        set_log_level(logging.WARNING)  # for running on sharcnet

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name='phase')

    # for attempting to break unscaled network.
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    save = False
    train_from_scratch = False
    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    # Define TF model graph
    if binary:
        print('binary=True')
        if scale:
            print('scale=True')
            if rand:
                print('rand=True')
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
                model = make_scaled_binary_rand_cnn(
                    phase,
                    logits_scalar,
                    'binsc_',
                    input_shape=(None, img_rows, img_cols, channels),
                    nb_filters=nb_filters)
            else:
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
                model = make_scaled_binary_cnn(phase,
                                               logits_scalar,
                                               'binsc_',
                                               input_shape=(None, img_rows,
                                                            img_cols,
                                                            channels),
                                               nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bin_',
                                          nb_filters=nb_filters)
    else:
        if rand:
            print('rand=True')
            from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn
            model = make_scaled_rand_cnn(phase,
                                         logits_scalar,
                                         'fp_rand',
                                         nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_cnn
            model = make_basic_cnn(phase,
                                   logits_scalar,
                                   'fp_',
                                   nb_filters=nb_filters)

    preds = model(x, reuse=False)  # * logits_scalar
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################
    rng = np.random.RandomState([2017, 8, 30])

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    def evaluate():
        # Evaluate the accuracy of the MNIST model on clean test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

        if adv != 0:
            # Accuracy of the adversarially trained model on adversarial
            # examples
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % acc)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params,
                             feed={logits_scalar: ATTACK_T})
            print('Test accuracy on adversarial examples (scaled): %0.4f' %
                  acc)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if test:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            if test:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            evaluate=evaluate,
                            args=train_params,
                            save=save,
                            rng=rng)
            else:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            args=train_params,
                            save=save,
                            rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          preds,
                          X_test,
                          Y_test,
                          phase=phase,
                          feed={phase: False},
                          args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Build dataset
    ###########################################################################
    if viz_enabled:
        assert nb_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
        viz_rows = nb_classes if targeted else 2
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        if viz_enabled:
            from cleverhans.utils import grid_visual
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels)
        else:
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
                img_cols, channels)
    else:
        if viz_enabled:
            from cleverhans.utils import pair_visual
            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        print('Attack: CarliniWagnerL2')
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        print('Attack: SaliencyMapMethod')
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        print('Attack: FastGradientMethod')
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        print('Attack: MadryEtAl')
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    adv_np = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples)
    fpath = os.path.join(
        '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name)
    np.savez(fpath, x=adv_np, y=Y_test[:nb_samples])
    '''
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                         phase: False}, args=eval_params)
    '''
    eval_params = {'batch_size': att_batch_size}
    if targeted:
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_np,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)

    else:
        print("Evaluating untargeted results")
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[idxs],
                                      phase=phase,
                                      args=eval_params)
        else:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[:nb_samples],
                                      phase=phase,
                                      args=eval_params)

    if viz_enabled:
        n = nb_classes - 1
        for i in range(nb_classes):
            if targeted:
                for j in range(nb_classes):
                    if i != j:
                        if j != 0 and i != n:
                            grid_viz_data[i, j] = adv_np[j * n + i]
                        if j == 0 and i > 0 or i == n and j > 0:
                            grid_viz_data[i, j] = adv_np[j * n + i - 1]
                    else:
                        grid_viz_data[i, j] = adv_inputs[j * n]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv_np[j]
        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Compute number of modified features (L_0 norm)
    nb_changed = np.where(adv_np != adv_inputs)[0].shape[0]
    percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0])

    # Compute the average distortion introduced by the algorithm
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f}'.format(accuracy))
    print('{0:.4f}'.format(adv_accuracy))
    print('{0:.4f}'.format(percent_perturbed))
    print('{0:.4f}'.format(percent_perturb))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 18
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter

    #### EMPIR extra flags
    lowprecision = FLAGS.lowprecision
    abits = FLAGS.abits
    wbits = FLAGS.wbits
    abitsList = FLAGS.abitsList
    wbitsList = FLAGS.wbitsList
    stocRound = True if FLAGS.stocRound else False
    rand = FLAGS.rand
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree = True if FLAGS.ensembleThree else False
    abits2 = FLAGS.abits2
    wbits2 = FLAGS.wbits2
    abits2List = FLAGS.abits2List
    wbits2List = FLAGS.wbits2List
    inpgradreg = True if FLAGS.inpgradreg else False
    distill = True if FLAGS.distill else False
    student_epochs = FLAGS.student_epochs
    l2dbl = FLAGS.l2dbl
    l2cs = FLAGS.l2cs
    ####

    save = False
    train_from_scratch = False

    if ensembleThree:
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, batch_size,
                                                   nb_filters, learning_rate,
                                                   nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if ensembleThree:
        if (wbitsList is None) or (
                abitsList is None
        ):  # Layer wise separate quantization not specified for first model
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec1 = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the first model"
            )
            sys.exit(1)
        else:
            fixedPrec1 = 0

        if (wbits2List is None) or (
                abits2List is None
        ):  # Layer wise separate quantization not specified for second model
            if (wbits2 == 0) or (abits2 == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec2 = 1
        elif (len(wbits2List) != 3) or (len(abits2List) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the second model"
            )
            sys.exit(1)
        else:
            fixedPrec2 = 0

        if (fixedPrec2 != 1) or (
                fixedPrec1 != 1
        ):  # Atleast one of the models have separate precisions per layer
            fixedPrec = 0
            print("Within atleast one model has separate precisions")
            if (fixedPrec1 == 1):  # first layer has fixed precision
                abitsList = (abits, abits, abits)
                wbitsList = (wbits, wbits, wbits)
            if (fixedPrec2 == 1):  # second layer has fixed precision
                abits2List = (abits2, abits2, abits2)
                wbits2List = (wbits2, wbits2, wbits2)
        else:
            fixedPrec = 1

        if (train_from_scratch):
            print("The ensemble model cannot be trained from scratch")
            sys.exit(1)
        if fixedPrec == 1:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn
            model = make_ensemble_three_cifar_cnn(phase,
                                                  logits_scalar,
                                                  'lp1_',
                                                  'lp2_',
                                                  'fp_',
                                                  wbits,
                                                  abits,
                                                  wbits2,
                                                  abits2,
                                                  input_shape=(None, img_rows,
                                                               img_cols,
                                                               channels),
                                                  nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn_layerwise
            model = make_ensemble_three_cifar_cnn_layerwise(
                phase,
                logits_scalar,
                'lp1_',
                'lp2_',
                'fp_',
                wbitsList,
                abitsList,
                wbits2List,
                abits2List,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters)
    elif lowprecision:
        if (wbitsList is None) or (
                abitsList is
                None):  # Layer wise separate quantization not specified
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags"
                )
                sys.exit(1)
            else:
                fixedPrec = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers"
            )
            sys.exit(1)
        else:
            fixedPrec = 0

        if fixedPrec:
            from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_cifar_cnn
            model = make_basic_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbits,
                abits,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
        else:
            from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_cifar_cnn
            model = make_layerwise_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbitsList,
                abitsList,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
    elif distill:
        from cleverhans_tutorials.tutorial_models import make_distilled_cifar_cnn
        model = make_distilled_cifar_cnn(phase,
                                         logits_scalar,
                                         'teacher_fp_',
                                         'fp_',
                                         nb_filters=nb_filters,
                                         input_shape=(None, img_rows, img_cols,
                                                      channels))
    ####
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cifar_cnn
        model = make_basic_cifar_cnn(phase,
                                     logits_scalar,
                                     'fp_',
                                     input_shape=(None, img_rows, img_cols,
                                                  channels),
                                     nb_filters=nb_filters)

    # separate predictions of teacher for distilled training
    if distill:
        teacher_preds = model.teacher_call(x, reuse=False)
        teacher_logits = model.get_teacher_logits(x, reuse=False)

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
        ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)
        else:
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if distill:
            temperature = 10  # 1 means the teacher predictions are used as it is
            teacher_scaled_preds_val = model_train_teacher(sess,
                                                           x,
                                                           y,
                                                           teacher_preds,
                                                           teacher_logits,
                                                           temperature,
                                                           X_train,
                                                           Y_train,
                                                           phase=phase,
                                                           args=train_params,
                                                           rng=rng)
            eval_params = {'batch_size': batch_size}
            teacher_acc = model_eval(sess,
                                     x,
                                     y,
                                     teacher_preds,
                                     X_test,
                                     Y_test,
                                     phase=phase,
                                     args=eval_params)
            print(
                'Test accuracy of the teacher model on legitimate examples: %0.4f'
                % teacher_acc)
            print('Training the student model...')
            student_train_params = {
                'nb_epochs': student_epochs,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'loss_name': 'train loss',
                'filename': 'model',
                'reuse_global_step': False,
                'train_scope': 'train',
                'is_training': True
            }
            if save:
                student_train_params.update({'log_dir': model_path})
            y_teacher = tf.placeholder(tf.float32, shape=(None, nb_classes))
            model_train_student(sess,
                                x,
                                y,
                                preds,
                                temperature,
                                X_train,
                                Y_train,
                                y_teacher=y_teacher,
                                teacher_preds=teacher_scaled_preds_val,
                                alpha=0.3,
                                beta=0.7,
                                phase=phase,
                                evaluate=evaluate,
                                args=student_train_params,
                                save=save,
                                rng=rng)
        elif inpgradreg:
            model_train_inpgrad_reg(sess,
                                    x,
                                    y,
                                    preds,
                                    X_train,
                                    Y_train,
                                    phase=phase,
                                    evaluate=evaluate,
                                    l2dbl=l2dbl,
                                    l2cs=l2cs,
                                    args=train_params,
                                    save=save,
                                    rng=rng)
        else:
            # do clean training for 'nb_epochs' or 'delay' epochs
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv_train,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

    else:
        if ensembleThree:
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            stored_variables = [
                'lp_conv1_init/k', 'lp_conv2_init/k', 'lp_conv3_init/k',
                'lp_ip1init/W', 'lp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[:5]))
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            variable_dict = dict(zip(stored_variables, variables[5:10]))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            stored_variables = [
                'fp_conv1_init/k', 'fp_conv2_init/k', 'fp_conv3_init/k',
                'fp_ip1init/W', 'fp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[10:]))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
        else:
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble(sess,
                                       x,
                                       y,
                                       preds,
                                       X_test,
                                       Y_test,
                                       phase=phase,
                                       feed={phase: False},
                                       args=eval_params)
    else:
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds,
                              X_test,
                              Y_test,
                              phase=phase,
                              feed={phase: False},
                              args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
            img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model,
                                   back='tf',
                                   model_type=model_type,
                                   num_classes=nb_classes,
                                   sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model,
                                     back='tf',
                                     model_type=model_type,
                                     sess=sess,
                                     num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model,
                                      back='tf',
                                      model_type=model_type,
                                      sess=sess,
                                      num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model,
                             back='tf',
                             model_type=model_type,
                             sess=sess,
                             num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model,
                                        back='tf',
                                        sess=sess,
                                        model_type=model_type,
                                        num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  X_test_adv,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)
    else:
        # assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        if ensembleThree:
            adv_accuracy = model_eval_ensemble(sess,
                                               x,
                                               y,
                                               preds,
                                               X_test_adv,
                                               Y_test,
                                               phase=phase,
                                               args=eval_params)
        else:  #default below
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test_adv,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()
    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
Ejemplo n.º 19
0
 def restore(self, path):
     """
     Wrapper around cleverhans utils tf.model_load
     """
     return tf_model_load(self.session, path)
Ejemplo n.º 20
0
def mnist_tutorial_fgsm(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output=NOISE_OUTPUT):
  """
  MNIST tutorial for Fast Gradient Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a FGSM attack object
  fgsm = FastGradientMethod(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
    yname = "y_target"
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None
    yname = "y"

  if targeted:
    fgsm_params_batch_size = source_samples * nb_classes
  else:
    fgsm_params_batch_size = source_samples
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv = fgsm.generate_np(adv_inputs,
                       **fgsm_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))


  ###########################################################################
  # Adversarial Training
  ###########################################################################

  model2 = ModelBasicCNN('model2', nb_classes, nb_filters) 
  
  fgsm2 = FastGradientMethod(model2, sess=sess)

  def attack_fgsm(x):
    return fgsm2.generate(adv_inputs, **fgsm_params)

  preds2 = model2.get_logits(x)
  loss2 = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)

  train(sess, loss2, x_train, y_train, args=train_params, rng=rng)
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on adversarial fgsm test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy
  print("Defined TensorFlow model graph.")

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    # _ = grid_visual(grid_viz_data)
    # cleverhans_image.save("output", grid_viz_data)
    if noise_output:
      image_name = "output/fgsm_mnist_noise.png"
    else:
      image_name = "output/fgsm_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
Ejemplo n.º 21
0
def main(argv=None):
    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter
    
    #### EMPIR extra flags
    lowprecision=FLAGS.lowprecision
    abits=FLAGS.abits
    wbits=FLAGS.wbits
    abitsList=FLAGS.abitsList
    wbitsList=FLAGS.wbitsList
    stocRound=True if FLAGS.stocRound else False
    rand=FLAGS.rand 
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree=True if FLAGS.ensembleThree else False
    abits2=FLAGS.abits2
    wbits2=FLAGS.wbits2
    abits2List=FLAGS.abits2List
    wbits2List=FLAGS.wbits2List
    ####
   
    save = False
    train_from_scratch = False

    #### Imagenet flags
    imagenet_path = FLAGS.imagenet_path
    if imagenet_path is None:
        print("Error: Imagenet data path not specified")
        sys.exit(1)

    # Imagenet specific dimensions
    img_rows = _DEFAULT_IMAGE_SIZE
    img_cols = _DEFAULT_IMAGE_SIZE
    channels = _NUM_CHANNELS
    nb_classes = _NUM_CLASSES

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)
    
    # Get imagenet datasets
    train_dataset, test_dataset = data_imagenet(nb_epochs, batch_size, imagenet_path)

    # Creating a initializable iterators
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    # Getting next elements from the iterators
    next_test_element = test_iterator.get_next()
    next_train_element = train_iterator.get_next()
    
    train_x, train_y = train_iterator.get_next()
    test_x, test_y = test_iterator.get_next()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name="phase")

    logits_scalar = tf.placeholder_with_default(
        INIT_T, shape=(), name="logits_temperature")
   
    if ensembleThree: 
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given
    
    if ensembleThree: 
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified for first model
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec1 = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the first model")  
           sys.exit(1)
       else: 
           fixedPrec1 = 0
       
       if (wbits2List is None) or (abits2List is None): # Layer wise separate quantization not specified for second model
           if (wbits2==0) or (abits2==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec2 = 1
       elif (len(wbits2List) != 6) or (len(abits2List) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the second model")  
           sys.exit(1)
       else: 
           fixedPrec2 = 0

       if (fixedPrec2 != 1) or (fixedPrec1 != 1): # Atleast one of the models have separate precisions per layer
           fixedPrec=0
           print("Within atleast one model has separate precisions")
           if (fixedPrec1 == 1): # first layer has fixed precision
               abitsList = (abits, abits, abits, abits, abits, abits)
               wbitsList = (wbits, wbits, wbits, wbits, wbits, wbits)
           if (fixedPrec2 == 1): # second layer has fixed precision
               abits2List = (abits2, abits2, abits2, abits2, abits2, abits2)
               wbits2List = (wbits2, wbits2, wbits2, wbits2, wbits2, wbits2)
       else:
           fixedPrec=1
       
       if (train_from_scratch):
           print ("The ensemble model cannot be trained from scratch")
           sys.exit(1)
       if fixedPrec == 1:
           from cleverhans_tutorials.tutorial_models import make_ensemble_three_alexnet
           model = make_ensemble_three_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_three_combined_alexnet
           model = make_layerwise_three_combined_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    elif lowprecision:
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags")
               sys.exit(1)
           else:
               fixedPrec = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer")  
           sys.exit(1)
       else: 
           fixedPrec = 0
       
       if fixedPrec:
           
           ### For training from scratch
           from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_alexnet
           model = make_basic_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbits, abits, input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes)  
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_alexnet
           model = make_layerwise_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbitsList, abitsList, 
            input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    else:
        ### For training from scratch
        from cleverhans_tutorials.tutorial_models import make_basic_alexnet_from_scratch
        model = make_basic_alexnet_from_scratch(phase, logits_scalar, 'fp_', input_shape=(
        None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
    ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        else: #default below
            acc = model_eval_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an Imagenet model
    train_params = {
        'lowprecision': lowprecision,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {'eps': MAX_EPS, 'eps_iter': 0.01,
                                   'nb_iter': nb_iter}
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {'eps': tf.abs(tf.truncated_normal(
                shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev))}
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar
   #  if adv:
   #      from cleverhans.attacks import FastGradientMethod
   #      fgsm = FastGradientMethod(model, back='tf', sess=sess)
   #      fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
   #      adv_x_train = fgsm.generate(x, phase, **fgsm_params)
   #      preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})
        
        # do clean training for 'nb_epochs' or 'delay' epochs with learning rate reducing with time
        model_train_imagenet2(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train_imagenet(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                    predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng)
    else:
        if ensembleThree: ## ensembleThree models have to loaded from different paths
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            # First 11 variables from path1
            stored_variables = ['lp_conv1_init/k', 'lp_conv1_init/b', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_conv4_init/k', 'lp_conv5_init/k', 'lp_ip1init/W', 'lp_ip1init/b', 'lp_ip2init/W', 'lp_logits_init/W', 'lp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[:11]))) # only dict was messing with the order
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            # Second 11 variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[11:22])))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Third 11 variables from path3
            stored_variables = ['fp_conv1_init/k', 'fp_conv1_init/b', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_conv4_init/k', 'fp_conv5_init/k', 'fp_ip1init/W', 'fp_ip1init/b', 'fp_ip2init/W', 'fp_logits_init/W', 'fp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[22:33])))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
            # Next 24 batch norm variables from path1
            stored_variables = ['lp__batchNorm1/batch_normalization/gamma', 'lp__batchNorm1/batch_normalization/beta', 'lp__batchNorm1/batch_normalization/moving_mean', 'lp__batchNorm1/batch_normalization/moving_variance', 'lp__batchNorm2/batch_normalization/gamma', 'lp__batchNorm2/batch_normalization/beta', 'lp__batchNorm2/batch_normalization/moving_mean', 'lp__batchNorm2/batch_normalization/moving_variance', 'lp__batchNorm3/batch_normalization/gamma', 'lp__batchNorm3/batch_normalization/beta', 'lp__batchNorm3/batch_normalization/moving_mean', 'lp__batchNorm3/batch_normalization/moving_variance', 'lp__batchNorm4/batch_normalization/gamma', 'lp__batchNorm4/batch_normalization/beta', 'lp__batchNorm4/batch_normalization/moving_mean', 'lp__batchNorm4/batch_normalization/moving_variance', 'lp__batchNorm5/batch_normalization/gamma', 'lp__batchNorm5/batch_normalization/beta', 'lp__batchNorm5/batch_normalization/moving_mean', 'lp__batchNorm5/batch_normalization/moving_variance', 'lp__batchNorm6/batch_normalization/gamma', 'lp__batchNorm6/batch_normalization/beta', 'lp__batchNorm6/batch_normalization/moving_mean', 'lp__batchNorm6/batch_normalization/moving_variance']

            variable_dict = dict(OrderedDict(zip(stored_variables, variables[33:57])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Next 24 batch norm variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[57:81])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Final 24 batch norm variables from path1
            stored_variables = ['fp__batchNorm1/batch_normalization/gamma', 'fp__batchNorm1/batch_normalization/beta', 'fp__batchNorm1/batch_normalization/moving_mean', 'fp__batchNorm1/batch_normalization/moving_variance', 'fp__batchNorm2/batch_normalization/gamma', 'fp__batchNorm2/batch_normalization/beta', 'fp__batchNorm2/batch_normalization/moving_mean', 'fp__batchNorm2/batch_normalization/moving_variance', 'fp__batchNorm3/batch_normalization/gamma', 'fp__batchNorm3/batch_normalization/beta', 'fp__batchNorm3/batch_normalization/moving_mean', 'fp__batchNorm3/batch_normalization/moving_variance', 'fp__batchNorm4/batch_normalization/gamma', 'fp__batchNorm4/batch_normalization/beta', 'fp__batchNorm4/batch_normalization/moving_mean', 'fp__batchNorm4/batch_normalization/moving_variance', 'fp__batchNorm5/batch_normalization/gamma', 'fp__batchNorm5/batch_normalization/beta', 'fp__batchNorm5/batch_normalization/moving_mean', 'fp__batchNorm5/batch_normalization/moving_variance', 'fp__batchNorm6/batch_normalization/gamma', 'fp__batchNorm6/batch_normalization/beta', 'fp__batchNorm6/batch_normalization/moving_mean', 'fp__batchNorm6/batch_normalization/moving_variance']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[81:105])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path3))
        else: # restoring the model trained using this setup, not a downloaded one
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
            # evaluate()


    # Evaluate the accuracy of the model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    else: #default below
        accuracy = model_eval_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    adv_inputs = test_x #adversarial inputs can be generated from any of the test examples 

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    nb_adv_per_sample = 1
    adv_ys = None
    yname = "y"

    print('Crafting adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({'clip_min': -2.2, 'clip_max': 2.7}) # Since max and min for imagenet turns out to be around -2.11 and 2.12
    eval_params = {'batch_size': batch_size}
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    print("Evaluating un-targeted results")
    if ensembleThree:
        adv_accuracy = model_eval_ensemble_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    else:
        adv_accuracy = model_eval_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    
    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))


    # Close TF session
    sess.close()
Ejemplo n.º 22
0
                X_train,
                Y_train,
                evaluate=evaluate,
                args=train_params,
                rng=rng,
                save=True)

    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_par)
    print('Train accuracy on legitimate examples: %0.4f\n' % acc)

    acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par)
    print('Test accuracy on legitimate examples: %0.4f\n' % acc)

if not TRAIN:
    tf_model_load(sess, cache_dir + "/" + MODEL_NAME)

fgsm_params = {'eps': 0.25, 'clip_min': 0., 'clip_max': 1.}

fgsm = FastGradientMethod(model, sess=sess)
adversarial_sample = fgsm.generate(x, **fgsm_params)

# Predictions and class of the adversarial examples.
adversarial_preds = model.get_probs(adversarial_sample)
adversarial_class = tf.argmax(adversarial_preds, axis=-1)

X_adv = np.ndarray(shape=X_test.shape)
Y_adv = np.copy(Y_test)

X_slic = np.ndarray(shape=X_test.shape)
Y_slic = np.copy(Y_test)
def prep_bbox(sess,
              logits_scalar,
              x,
              y,
              X_train,
              Y_train,
              X_test,
              Y_test,
              img_rows,
              img_cols,
              channels,
              nb_epochs,
              batch_size,
              learning_rate,
              rng,
              phase=None,
              binary=False,
              scale=False,
              nb_filters=64,
              model_path=None,
              adv=0,
              delay=0,
              eps=0.3):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param rng: numpy.random.RandomState
    :return:
    """

    # Define TF model graph (for the black-box model)
    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            #from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            # model = make_scaled_binary_cnn(phase, 'bb_binsc_', input_shape=(
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
            model = make_scaled_binary_rand_cnn(phase,
                                                logits_scalar,
                                                'bb_binsc_',
                                                input_shape=(None, img_rows,
                                                             img_cols,
                                                             channels),
                                                nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bb_bin_',
                                          input_shape=(None, img_rows,
                                                       img_cols, channels),
                                          nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase,
                               logits_scalar,
                               'bb_fp_',
                               input_shape=(None, img_rows, img_cols,
                                            channels),
                               nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Print out the accuracy on legitimate data
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        print('Test accuracy of black-box on legitimate test '
              'examples: %.4f' % acc)

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'bb train loss',
        'filename': 'bb_model',
        'train_scope': 'bb_model',
        'reuse_global_step': False,
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            nb_iter = 20
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        if adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)

        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    phase=phase,
                    evaluate=evaluate,
                    args=train_params,
                    save=save,
                    rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)

    accuracy = evaluate()

    return model, preds, accuracy, model_path
Ejemplo n.º 24
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              args=train_params,
              save=os.path.exists("models"),
              rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 25
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, nb_classes=10, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Disable Keras learning phase since we will be serving through tensorflow
    keras.layers.core.K.set_learning_phase(0)

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Image dimensions ordering should follow the TensorFlow convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' "
              "to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"))

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    wrap = KerasModelWrapper(model)
    cw = CarliniWagnerL2(wrap, back='tf', sess=sess)

    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(10)]
    if targeted:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        one_hot = np.zeros((10, 10))
        one_hot[np.arange(10), np.arange(10)] = 1

        adv_inputs = np.array([[instance] * 10 for instance in X_test[idxs]],
                              dtype=np.float32)
        adv_inputs = adv_inputs.reshape((100, 28, 28, 1))
        adv_ys = np.array([one_hot] * 10, dtype=np.float32).reshape((100, 10))
        yname = "y_target"
    else:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        adv_inputs = X_test[idxs]
        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': 100 if targeted else 10,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    if targeted:
        adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys,
                                  args={'batch_size': 10})
    else:
        adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                    args={'batch_size': 10})

    for j in range(10):
        if targeted:
            for i in range(10):
                grid_viz_data[i, j] = adv[i * 10 + j]
        else:
            grid_viz_data[j, 0] = adv_inputs[j]
            grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 26
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()
    model_path = "./"
    model_name = "clean_trained__model_notmnist"
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(7895)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    # X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
    #                                               train_end=train_end,
    #                                               test_start=test_start,
    #                                               test_end=test_end)
    # Get notMNIST data
    with np.load("notmnist.npz") as data:
        X_train, Y_train, X_test, Y_test = data['examples_train'], data[
            'labels_train'], data['examples_test'], data['labels_test']

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    # Define TF model graph
    model = make_basic_cnn()

    # Create TF session
    sess = tf.Session()

    if tf_model_load(sess, file_path=os.path.join(model_path, model_name)):
        print(model_name, " reloaded.")
    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_probs(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    return report
Ejemplo n.º 27
0
def main(argv=None):
    tf.set_random_seed(1234)
    sess = tf.Session()
    keras.backend.set_session(sess)

    X_train, Y_train, X_test, Y_test = data_cifar10()
    Y_train = Y_train.clip(.1 / 9., 1. - .1)

    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model = cnn_model(img_rows=32, img_cols=32, channels=3)
    predictions = model(x)

    def evaluate():
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'train_dir': FLAGS.train_dir,
        'filename': FLAGS.filename
    }

    model_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    predictions,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    save=True)

    wrap = KerasModelWrapper(model)

    nb_classes = 10
    targeted = False
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'

    cw = CarliniWagnerL2(model, back='tf', sess=sess)
    adv_inputs = X_test[:10]
    adv_ys = None
    yname = "y"
    edges = edgeDetect(adv_inputs)
    print('Length of edges of adv samples: ')
    print(edges[:10])

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': 10,
        'initial_const': 10,
    }

    adv = cw.generate_np(adv_inputs, **cw_params)
    edges = edgeDetect(adv)
    print('Length of edges of adv samples: ')
    print(edges[:10])

    sigma = 16.0 / 255
    gamma = 0.00061 * 255 * 255
    alpha = 0.00061 * 255 * 255

    n_clusters = 10
    n_samples = 50

    noise = np.random.normal(0.0, sigma, adv.shape)
    adv_gauss = adv + noise

    i1 = np.repeat(np.arange(0, 10), n_samples)
    i2 = np.random.randint(32, size=10 * n_samples)
    i3 = np.random.randint(32, size=10 * n_samples)

    sample = adv[i1, i2, i3]
    noise = np.random.normal(0.0, sigma, sample.shape)
    noisy_samples = sample + noise
    noisy_samples = np.reshape(noisy_samples, (10, n_samples, 3))

    noise = np.random.normal(0.0, sigma, adv.shape)

    adv_rdesc = np.zeros(adv.shape)
    adv_rmix = np.zeros(adv.shape)

    for img_no, img_samples in enumerate(noisy_samples):

        clusters = np.zeros((n_clusters, 3))
        clusters[0] = img_samples[0]

        for c_j in range(1, n_clusters):

            prob_cj = np.zeros(n_samples)

            for pix_no, pix in enumerate(img_samples):

                l2_min = 100000
                for c_l in range(0, c_j):
                    l2_norm_sq = np.inner(pix - clusters[c_l],
                                          pix - clusters[c_l])
                    if l2_norm_sq < l2_min:
                        l2_min = l2_norm_sq

                prob_cj[pix_no] = math.exp(gamma * l2_min)

            prob_cj /= prob_cj.sum()
            clusters[c_j] = img_samples[np.random.choice(n_samples,
                                                         1,
                                                         p=prob_cj)]

        for pix_i in range(0, 32):
            for pix_j in range(0, 32):
                c_dist_min = 100000
                c_min = np.zeros(3)
                c_sum = np.zeros(3)
                weight_sum = 0
                for c_j in clusters:
                    c_dist = np.linalg.norm(adv_gauss[img_no][pix_i][pix_j] -
                                            c_j)
                    weight_j = math.exp(-1 * alpha * c_dist * c_dist)
                    weight_sum = weight_sum + weight_j
                    c_sum = c_sum + weight_j * c_j
                    if c_dist < c_dist_min:
                        c_dist_min = c_dist
                        c_min = c_j

                adv_rdesc[img_no][pix_i][pix_j] = c_min
                adv_rmix[img_no][pix_i][pix_j] = c_sum / weight_sum

    eval_params = {'batch_size': np.minimum(nb_classes, 10)}

    adv_accuracy = 1 - model_eval(
        sess, x, y, predictions, adv, Y_test[:10], args=eval_params)

    print('Avg. rate of successful adv. examples without noise {0:.4f}'.format(
        adv_accuracy))

    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations without noise {0:.4f}'.format(
        percent_perturbed))

    adv_accuracy = 1 - model_eval(
        sess, x, y, predictions, adv_gauss, Y_test[:10], args=eval_params)

    print('Avg. rate of successful adv. examples with Gaussian noise {0:.4f}'.
          format(adv_accuracy))

    percent_perturbed = np.mean(
        np.sum((adv_gauss - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations with Gaussian noise {0:.4f}'.format(
        percent_perturbed))

    adv_accuracy = 1 - model_eval(
        sess, x, y, predictions, adv_rdesc, Y_test[:10], args=eval_params)

    print('Avg. rate of successful adv. examples with random descent {0:.4f}'.
          format(adv_accuracy))

    percent_perturbed = np.mean(
        np.sum((adv_rdesc - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations with random descent {0:.4f}'.format(
        percent_perturbed))

    adv_accuracy = 1 - model_eval(
        sess, x, y, predictions, adv_rmix, Y_test[:10], args=eval_params)

    print('Avg. rate of successful adv. examples with random mixture {0:.4f}'.
          format(adv_accuracy))

    percent_perturbed = np.mean(
        np.sum((adv_rmix - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations with random mixture {0:.4f}'.format(
        percent_perturbed))

    sess.close()
Ejemplo n.º 28
0
def main(_):
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Prepare graph
    x_input = tf.placeholder(tf.float32, shape=batch_shape)
    y_label = tf.placeholder(tf.int32, shape=(FLAGS.batch_size, ))
    y_hot = tf.one_hot(y_label, num_classes)

    model = InceptionModel(num_classes)
    preds = model(x_input)
    logits = model.get_logits(x_input)
    acc = _top_1_accuracy(logits, y_label)
    tf_model_load(sess, FLAGS.checkpoint_path)

    attack = KKTFun5(model, sess=sess)
    eps = FLAGS.eps
    alp = FLAGS.alp
    params = {
        'eps': 0.3,
        'alp': 1.0,
        'ord': 2,
        'nb_iter': eps,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_x, log_step, log_suc = attack.generate(x_input, y_label, **params)
    adv_image = np.zeros((1000, 299, 299, 3))
    l2_norm = np.zeros((1000))
    acc_ori = np.zeros((1000))
    acc_val = np.zeros((1000))
    pred_score = np.zeros((1000, 1001))
    pred_score_adv = np.zeros((1000, 1001))
    name = []
    b_i = 0

    begin = time.time()
    for images, _, labels, filenames in load_images(FLAGS.input_dir,
                                                    FLAGS.input_dir,
                                                    FLAGS.metadata_file_path,
                                                    batch_shape):
        bb_i = b_i + FLAGS.batch_size
        y_labels = np.zeros((FLAGS.batch_size, num_classes))
        for i_y in range(FLAGS.batch_size):
            y_labels[i_y][labels[i_y]] = 1
        x_adv = sess.run(adv_x, feed_dict={x_input: images, y_label: labels})
        #acc_val[b_i:bb_i] = sess.run(log_suc,feed_dict={x_input:images,y_label:labels})
        #l2_norm[b_i:bb_i] = sess.run(log_step,feed_dict={x_input:images,y_label:labels})
        #pdb.set_trace()
        #acc_ori[b_i] = sess.run(acc,feed_dict={x_input:images,y_label:labels})
        #l2_norm[b_i] = np.mean(np.sum((images- x_adv)**2,axis=(1,2,3))**.5)
        adv_image[b_i:bb_i] = x_adv
        acc_ori[b_i:bb_i] = sess.run(acc,
                                     feed_dict={
                                         x_input: images,
                                         y_label: labels
                                     })
        acc_val[b_i:bb_i] = sess.run(acc,
                                     feed_dict={
                                         x_input: x_adv,
                                         y_label: labels
                                     })
        pred_score[b_i:bb_i] = sess.run(preds,
                                        feed_dict={
                                            x_input: images,
                                            y_label: labels
                                        })
        pred_score_adv[b_i:bb_i] = sess.run(preds,
                                            feed_dict={
                                                x_input: x_adv,
                                                y_label: labels
                                            })
        l2_norm[b_i:bb_i] = np.sum((x_adv - images)**2, axis=(1, 2, 3))**.5
        name.append(filenames)
        b_i = bb_i
Ejemplo n.º 29
0
    #exit()

    # restore from pretrained
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    sess.run(tf.global_variables_initializer())

    using_aug = True
    #ckpt_path = '../tfmodels/cifar10_simple_model_epoch50'
    ckpt_path = '../tfmodels/cifar10_resnet_model_epoch200'
    if using_aug:
        print('Using model trained by augmented data.')
        ckpt_path += '_aug'
    tf_model_load(sess, ckpt_path)

    # prepare data
    (x_train, y_train), (x_test, y_test) = load_cifar10()
    x_filtered, y_filtered, _ = filter_data(sess, x, y, model, x_test, y_test)

    # make sure the model load properly by running it against the test set
    accuracy = validate_model(sess, x, y, model, x_filtered, y_filtered)
    print('Base accuracy of the target model on legitimate images: ' + str(accuracy))

    # initiate attack
    target = None
    attack_method = 'basic_iterative'
    recover_method = 'middleground_flip'
    recover_params = {'eps': 8./255,
                      'eps_iter': 1./255,   # only used when ord=np.inf
Ejemplo n.º 30
0
def mnist_tutorial_adv_train(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000,
                             viz_enabled=VIZ_ENABLED,
                             nb_epochs=NB_EPOCHS,
                             batch_size=BATCH_SIZE,
                             source_samples=SOURCE_SAMPLES,
                             learning_rate=LEARNING_RATE,
                             attack_iterations=ATTACK_ITERATIONS,
                             model_path=MODEL_PATH,
                             targeted=TARGETED,
                             noise_output=NOISE_OUTPUT):
    """
  MNIST tutorial for Adversarial Training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using FGSM - BIM - MIM approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    fgsm = FastGradientMethod(model, sess=sess)
    bim = BasicIterativeMethod(model, sess=sess)
    mim = MomentumIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }
    mim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv_fgsm = fgsm.generate_np(adv_inputs, **fgsm_params)
    adv_bim = bim.generate_np(adv_inputs, **bim_params)
    adv_mim = mim.generate_np(adv_inputs, **mim_params)
    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_fgsm_accuracy = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       adv_fgsm,
                                       adv_ys,
                                       args=eval_params)
        adv_bim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_bim,
                                      adv_ys,
                                      args=eval_params)
        adv_mim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_mim,
                                      adv_ys,
                                      args=eval_params)

    else:
        if viz_enabled:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[idxs],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[idxs],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[idxs],
                                 args=eval_params)
            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim
        else:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[:source_samples],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[:source_samples],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[:source_samples],
                                 args=eval_params)

            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. (FGSM) examples {0:.4f}'.format(
        adv_fgsm_accuracy))
    report.clean_train_adv_fgsm_eval = 1. - adv_fgsm_accuracy
    print('Avg. rate of successful adv. (BIM) examples {0:.4f}'.format(
        adv_bim_accuracy))
    report.clean_train_adv_bim_eval = 1. - adv_bim_accuracy
    print('Avg. rate of successful adv. (MIM) examples {0:.4f}'.format(
        adv_mim_accuracy))
    report.clean_train_adv_mim_eval = 1. - adv_mim_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed_fgsm = np.mean(
        np.sum((adv_fgsm - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (FGSM) perturbations {0:.4f}'.format(
        percent_perturbed_fgsm))
    percent_perturbed_bim = np.mean(
        np.sum((adv_bim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (BIM) perturbations {0:.4f}'.format(
        percent_perturbed_bim))
    percent_perturbed_mim = np.mean(
        np.sum((adv_mim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (MIM) perturbations {0:.4f}'.format(
        percent_perturbed_mim))

    ###########################################################################
    # Adversarial Training
    ###########################################################################

    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)

    fgsm2 = FastGradientMethod(model, sess=sess)

    # bim2 = BasicIterativeMethod(model, sess=sess)
    # mim2 = MomentumIterativeMethod(model, sess=sess)

    def attack_fgsm(x):
        return fgsm2.generate(adv_inputs, **fgsm_params)

    # def attack_bim(x):
    #   return bim2.generate(adv_inputs, **bim_params)
    # def attack_mim(x):
    #   return mim2.generate(adv_inputs, **mim_params)

    preds2 = model2.get_logits(x)
    loss2_fgsm = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)
    # loss2_bim = CrossEntropy(model2, smoothing=0.1, attack=attack_bim)
    # loss2_mim = CrossEntropy(model2, smoothing=0.1, attack=attack_mim)

    train(sess, loss2_fgsm, x_train, y_train, args=train_params, rng=rng)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on adversarial fgsm test examples: {0}'.format(
        accuracy))
    report.clean_train_clean_eval = accuracy
    print("Defined TensorFlow model graph.")

    adv_fgsm_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   adv_fgsm,
                                   adv_ys,
                                   args=eval_params)
    adv_bim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_bim,
                                  adv_ys,
                                  args=eval_params)
    adv_mim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_mim,
                                  adv_ys,
                                  args=eval_params)

    # Close TF session
    sess.close()

    return report
Ejemplo n.º 31
0
def zoo(viz_enabled=VIZ_ENABLED,
        nb_epochs=NB_EPOCHS,
        batch_size=BATCH_SIZE,
        source_samples=SOURCE_SAMPLES,
        learning_rate=LEARNING_RATE,
        attack_iterations=ATTACK_ITERATIONS,
        model_path=MODEL_PATH,
        targeted=TARGETED):
    """
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if DATASET == 'MNIST':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.MNIST(train_start=train_start,
                           train_end=train_end,
                           test_start=test_start,
                           test_end=test_end,
                           center=False)
    elif DATASET == 'SVHN':
        train_start = 0
        train_end = 73257
        test_start = 0
        test_end = 26032
        ds = dataset.SVHN(train_start=train_start,
                          train_end=train_end,
                          test_start=test_start,
                          test_end=test_end)
    elif DATASET == 'CIFAR10':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.CIFAR10(train_start=train_start,
                             train_end=train_end,
                             test_start=test_start,
                             test_end=test_end,
                             center=False)

    x_train, y_train, x_test, y_test = ds.get_set('train') + ds.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN(DATASET, nb_classes, nb_filters,
                          (None, img_rows, img_cols, nchannels))
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 10, 22])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a Zoo attack object
    zoo = Zoo(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    zoo_params = {
        'binary_search_steps': BINARY_SEARCH_STEPS,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': ZOO_LEARNING_RATE,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': INIT_CONST,
        'solver': SOLVER,
        'image_shape': [img_rows, img_cols, nchannels],
        'nb_classes': nb_classes
    }

    adv = zoo.generate_np(adv_inputs, **zoo_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - model_eval(
                sess, x, y, preds, adv, y_test[idxs], args=eval_params)
        else:
            adv_accuracy = 1 - model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          adv,
                                          y_test[:source_samples],
                                          args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 32
0
def cifar10_tutorial(train_start=0,
                     train_end=50000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.13, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)

    if clean_train:
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # save model
        #saver = tf.train.Saver()
        #saver.save(sess, "./checkpoint_dir/clean_model_100.ckpt")

        # load model and compute testing accuracy
    if testing:
        tf_model_load(sess, file_path="./checkpoint_dir/clean_model_100.ckpt")
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)

    # Evaluate the accuracy of the CIFAR10 model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

    # generate and show adversarial samples
    x_test_adv = np.zeros(shape=x_test.shape)

    for i in range(10):
        x_test_adv[i * 1000:(i + 1) * 1000] = adv_x.eval(
            session=sess, feed_dict={x: x_test[i * 1000:(i + 1) * 1000]})

    # implement anisotropic diffusion on adversarial samples
    x_test_filtered = np.zeros(shape=x_test_adv.shape)
    for i in range(y_test.shape[0]):
        x_test_filtered[i] = filter.anisotropic_diffusion(x_test_adv[i])

    # implement median on adversarial samples
    # x_test_filtered_med = np.zeros(shape=x_test_adv.shape)
    # for i in range(y_test.shape[0]):
    #     x_test_filtered_med[i] = medfilt(x_test_filtered_ad[i], kernel_size=(3,3,1))

    acc = model_eval(sess,
                     x,
                     y,
                     preds,
                     x_test_filtered,
                     y_test,
                     args=eval_params)
    print("acc after anisotropic diffusion is {}".format(acc))

    return report