예제 #1
0
def ld_mnist():
    """Load training and test data."""
    train_transforms = torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor()])
    test_transforms = torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor()])

    # Load MNIST dataset
    train_dataset = MNISTDataset(root="/tmp/data", transform=train_transforms)
    test_dataset = MNISTDataset(root="/tmp/data",
                                train=False,
                                transform=test_transforms)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=128,
                                               shuffle=True,
                                               num_workers=2)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=128,
                                              shuffle=False,
                                              num_workers=2)
    return EasyDict(train=train_loader, test=test_loader)
예제 #2
0
def main():
    global args
    args = get_args()
    torch.manual_seed(args.seed)
    criterion = nn.CrossEntropyLoss()
    print(args)

    if len(args.gpu) > 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        use_CUDA = True
    else:
        use_CUDA = False

    cudnn.benchmark = True

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'val':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
    }

    if args.dataset == 'mnist':
        train_dataset = MNISTDataset(split='train', seed=args.seed)
        val_dataset = MNISTDataset(split='val', seed=args.seed)
        input_channel = 1
    elif args.dataset == 'cifar10':
        train_dataset = CIFARDataset(split='train',
                                     seed=args.seed,
                                     transform=data_transforms['train'],
                                     percent=args.percent)
        val_dataset = CIFARDataset(split='val',
                                   seed=args.seed,
                                   transform=data_transforms['val'],
                                   percent=args.percent)
        input_channel = 3
    elif args.dataset == 'cifar100':
        train_dataset = CIFAR100Dataset(split='train',
                                        seed=args.seed,
                                        transform=data_transforms['train'],
                                        percent=args.percent)
        val_dataset = CIFAR100Dataset(split='val',
                                      seed=args.seed,
                                      transform=data_transforms['val'],
                                      percent=args.percent)
        input_channel = 3
    else:
        raise NotImplementedError

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.val_batch_size,
                            num_workers=args.num_workers)

    model = get_model(args,
                      input_channel=input_channel,
                      num_classes=args.num_classes)

    optimizers = get_optimizers(model, args.components, args.lr, args.gamma)

    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [80], gamma=0.5, last_epoch=-1)

    save_path = os.path.join(args.prefix, args.modeldir)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    writer = SummaryWriter(save_path)

    best_prec = 0
    for epoch in range(args.epochs):
        train(model,
              input_channel,
              optimizers,
              criterion,
              args.components,
              train_loader,
              val_loader,
              epoch,
              writer,
              args,
              use_CUDA=use_CUDA,
              clamp=args.clamp,
              num_classes=args.num_classes)
        loss, prec = val(model, val_loader, criterion, epoch, writer, use_CUDA)
        torch.save(model, os.path.join(save_path, 'checkpoint.pth.tar'))
        if prec > best_prec:
            torch.save(model, os.path.join(save_path, 'model_best.pth.tar'))
            best_prec = prec
예제 #3
0
import tensorflow as tf

from datasets import MNISTDataset
from time import time

# get the data
(train_imgs, train_lbls), (test_imgs,
                           test_lbls) = tf.keras.datasets.mnist.load_data()
mnist = MNISTDataset(train_imgs.reshape((-1, 784)),
                     train_lbls,
                     test_imgs.reshape((-1, 784)),
                     test_lbls,
                     batch_size=256,
                     seed=int(time()))

# define the model first, from input to output

# let's use fewer layers...
n_units = 100
n_layers = 2

# just set up a "chain" of hidden layers
layers = []
for layer in range(n_layers):
    layers.append(
        tf.keras.layers.Dense(
            n_units,
            activation=tf.nn.relu,
            kernel_initializer=tf.initializers.RandomUniform(minval=-0.01,
                                                             maxval=0.01),
            bias_initializer=tf.initializers.constant(0.001)))
예제 #4
0
tf.__version__

# In[ ]:

import numpy as np
import matplotlib.pyplot as plt
from datasets import MNISTDataset

# In[3]:

mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
plt.imshow(train_images[15], cmap='Greys_r')
data = MNISTDataset(train_images.reshape([-1, 784]),
                    train_labels,
                    test_images.reshape([-1, 784]),
                    test_labels,
                    batch_size=128)

# # Setting up parameters

# In[ ]:

train_steps = 1000
learning_rate = 1e-4
n_input = 28 * 28  # input layer (28x28 pixels)
n_hidden1 = 512  # 1st hidden layer
n_hidden2 = 256  # 2nd hidden layer
n_hidden3 = 128  # 3rd hidden layer
n_output = 10  # output layer (0-9 digits)
예제 #5
0
파일: fail2.py 프로젝트: tingled/idl
import tensorflow as tf

from datasets import MNISTDataset
from time import time

# get the data
mnist = MNISTDataset("mnist_data", batch_size=256, seed=int(time()))

# define the model first, from input to output
imgs = tf.placeholder(tf.float32, shape=[None, 28 * 28])

n_hidden = 100
n_layers = 8
w_range = 0.1
hidden = imgs
for layer in range(n_layers):
    name = "hidden_{}".format(layer)
    hidden = tf.layers.dense(
        hidden,
        n_hidden,
        activation=tf.nn.relu,
        #kernel_initializer=tf.random_uniform_initializer(minval=-w_range, maxval=w_range),
        kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=True),
        bias_initializer=tf.constant_initializer(0.01),
        name=name)
    tf.summary.histogram(name + "_hist", hidden)
logits = tf.layers.dense(hidden,
                         10,
                         kernel_initializer=tf.random_uniform_initializer(
                             minval=-w_range, maxval=w_range),
                         bias_initializer=tf.zeros_initializer,
예제 #6
0
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, \
        calculate_accuracy

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
        FLAGS.image_size = 32  # Redundant for the Current Attack.
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()
        FLAGS.image_size = 224  # Redundant for the Current Attack.

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()

    # Randomized optimizations
    if FLAGS.dataset_name != "ImageNet":
        all_idx = np.arange(10000)
        np.random.shuffle(all_idx)
        selected_idx = all_idx[:(FLAGS.nb_examples * 2)]
        X_test_all, Y_test_all = X_test_all[selected_idx], Y_test_all[
            selected_idx]

    # 2. Load a trained model.
    sess = load_tf_session()
    keras.backend.set_learning_phase(0)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32,
                       shape=(None, dataset.image_size, dataset.image_size,
                              dataset.num_channels))
    y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes))

    sq_list = FLAGS.squeezers.split(';')
    print(" Squeezers used for EOT :", sq_list)

    x_s = []
    squeezers = []
    models = []
    for squeezer in sq_list:
        x_s.append(
            tf.placeholder(tf.float32,
                           shape=(None, dataset.image_size, dataset.image_size,
                                  dataset.num_channels)))
        if squeezer.startswith("median"):
            squeezers.append(lambda x: x)
            with tf.variable_scope(FLAGS.model_name + squeezer):
                cur_model = dataset.load_model_by_name(
                    FLAGS.model_name,
                    logits=False,
                    input_range_type=1,
                    pre_filter=get_squeezer_by_name(squeezer, 'tensorflow'))
                cur_model.compile(loss='categorical_crossentropy',
                                  optimizer='sgd',
                                  metrics=['acc'])
                models.append(cur_model)
        else:
            squeezers.append(get_squeezer_by_name(squeezer, 'python'))
            with tf.variable_scope(FLAGS.model_name + "local" + squeezer):
                cur_model = dataset.load_model_by_name(FLAGS.model_name,
                                                       logits=False,
                                                       input_range_type=1)
                cur_model.compile(loss='categorical_crossentropy',
                                  optimizer='sgd',
                                  metrics=['acc'])
                models.append(cur_model)

    with tf.variable_scope(FLAGS.model_name + "vanilla"):
        model_vanilla = dataset.load_model_by_name(FLAGS.model_name,
                                                   logits=False,
                                                   input_range_type=1)
        model_vanilla.compile(loss='categorical_crossentropy',
                              optimizer='sgd',
                              metrics=['acc'])

    # 3. Evaluate the trained model.
    # TODO: add top-5 accuracy for ImageNet.
    print("Evaluating the pre-trained model...")

    # We use the Vanilla Model here for Prediction
    print(
        "  ************************************************* Shape of X_test_all :",
        X_test_all.shape)
    Y_pred_all = model_vanilla.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    # 4. Select some examples to attack.
    import hashlib
    from datasets import get_first_n_examples_id_each_class

    if FLAGS.select:
        # Filter out the misclassified examples.
        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        if FLAGS.test_mode:
            # Only select the first example of each class.
            correct_and_selected_idx = get_first_n_examples_id_each_class(
                Y_test_all[correct_idx])
            selected_idx = [correct_idx[i] for i in correct_and_selected_idx]
        else:
            if not FLAGS.balance_sampling:
                selected_idx = correct_idx[:FLAGS.nb_examples]
            else:
                # select the same number of examples for each class label.
                nb_examples_per_class = int(FLAGS.nb_examples /
                                            Y_test_all.shape[1])
                correct_and_selected_idx = get_first_n_examples_id_each_class(
                    Y_test_all[correct_idx], n=nb_examples_per_class)
                selected_idx = [
                    correct_idx[i] for i in correct_and_selected_idx
                ]
    else:
        selected_idx = np.array(range(FLAGS.nb_examples))

    from utils.output import format_number_range
    selected_example_idx_ranges = format_number_range(sorted(selected_idx))
    print("Selected %d examples." % len(selected_idx))
    print("Selected index in test set (sorted): %s" %
          selected_example_idx_ranges)
    X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[
        selected_idx], Y_pred_all[selected_idx]

    # The accuracy should be 100%.
    accuracy_selected = calculate_accuracy(Y_pred, Y_test)
    mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test)
    print('Test accuracy on selected legitimate examples %.4f' %
          (accuracy_selected))
    print('Mean confidence on ground truth classes, selected %.4f\n' %
          (mean_conf_selected))

    task = {}
    task['dataset_name'] = FLAGS.dataset_name
    task['model_name'] = FLAGS.model_name
    task['accuracy_test'] = accuracy_all
    task['mean_confidence_test'] = mean_conf_all

    task['test_set_selected_length'] = len(selected_idx)
    task['test_set_selected_idx_ranges'] = selected_example_idx_ranges
    task['test_set_selected_idx_hash'] = hashlib.sha1(
        str(selected_idx).encode('utf-8')).hexdigest()
    task['accuracy_test_selected'] = accuracy_selected
    task['mean_confidence_test_selected'] = mean_conf_selected

    task_id = "%s_%d_%s_%s" % \
            (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5],
             task['model_name'], )

    FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id)
    if not os.path.isdir(FLAGS.result_folder):
        os.makedirs(FLAGS.result_folder)

    from utils.output import save_task_descriptor
    save_task_descriptor(FLAGS.result_folder, [task])

    # 5. Generate adversarial examples.

    from utils.squeeze import reduce_precision_py
    from utils.parameter_parser import parse_params
    attack_string_hash = hashlib.sha1(
        FLAGS.attacks.encode('utf-8')).hexdigest()[:5]
    sample_string_hash = task['test_set_selected_idx_hash'][:5]

    from datasets.datasets_utils import get_next_class, get_least_likely_class
    Y_test_target_next = get_next_class(Y_test)
    Y_test_target_ll = get_least_likely_class(Y_pred)

    X_test_adv_list = []
    X_test_adv_discretized_list = []
    Y_test_adv_discretized_pred_list = []

    attack_string_list = filter(lambda x: len(x) > 0,
                                FLAGS.attacks.lower().split(';'))
    to_csv = []

    X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples')
    adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs')
    predictions_folder = os.path.join(FLAGS.result_folder, 'predictions')
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if not os.path.isdir(folder):
            os.makedirs(folder)

    predictions_fpath = os.path.join(predictions_folder, "legitimate.npy")
    np.save(predictions_fpath, Y_pred, allow_pickle=False)

    if FLAGS.clip >= 0:
        epsilon = FLAGS.clip
        print("Clip the adversarial perturbations by +-%f" % epsilon)
        max_clip = np.clip(X_test + epsilon, 0, 1)
        min_clip = np.clip(X_test - epsilon, 0, 1)

    # NOTE : At the moment we only support single attacks and single detectors.
    for attack_string in attack_string_list:
        attack_name, attack_params = parse_params(attack_string)
        print("\nRunning attack: %s %s" % (attack_name, attack_params))

        if 'targeted' in attack_params:
            targeted = attack_params['targeted']
            print("targeted value: %s" % targeted)
            if targeted == 'next':
                Y_test_target = Y_test_target_next
            elif targeted == 'll':
                Y_test_target = Y_test_target_ll
            elif targeted == False:
                attack_params['targeted'] = False
                Y_test_target = Y_test.copy()
        else:
            targeted = False
            attack_params['targeted'] = False
            Y_test_target = Y_test.copy()

        # Note that we use the attack model here instead of the vanilla model
        # Note that we pass in the Squeezer function for BPDA
        X_test_adv = eot_adversarial_attack(sess, model_vanilla, models, x, y,
                                            x_s, X_test, Y_test_target,
                                            attack_params, squeezers)

        if FLAGS.clip > 0:
            # This is L-inf clipping.
            X_test_adv = np.clip(X_test_adv, min_clip, max_clip)

        X_test_adv_list.append(X_test_adv)

        # 5.0 Output predictions.
        Y_test_adv_pred = model_vanilla.predict(X_test_adv)
        predictions_fpath = os.path.join(predictions_folder,
                                         "%s.npy" % attack_string)
        np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False)

        # 5.1 Evaluate the adversarial examples being discretized to uint8.
        print("\n---Attack (uint8): %s" % attack_string)
        # All data should be discretized to uint8.
        X_test_adv_discret = reduce_precision_py(X_test_adv, 256)
        X_test_adv_discretized_list.append(X_test_adv_discret)
        Y_test_adv_discret_pred = model_vanilla.predict(X_test_adv_discret)
        Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred)

        # Y_test_adv_discret_pred is for the vanilla model
        rec = evaluate_adversarial_examples(X_test, Y_test, X_test_adv_discret,
                                            Y_test_target.copy(), targeted,
                                            Y_test_adv_discret_pred)
        rec['dataset_name'] = FLAGS.dataset_name
        rec['model_name'] = FLAGS.model_name
        rec['attack_string'] = attack_string
        rec['discretization'] = True
        to_csv.append(rec)

    from utils.output import write_to_csv
    attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder,
                                                "%s_attacks_%s_evaluation.csv" % \
                                                (task_id, attack_string_hash))
    fieldnames = [
        'dataset_name', 'model_name', 'attack_string', 'discretization',
        'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist',
        'mean_l0_dist_value', 'mean_l0_dist_pixel'
    ]
    write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames)

    if FLAGS.visualize is True:
        from datasets.visualization import show_imgs_in_rows
        if FLAGS.test_mode or FLAGS.balance_sampling:
            selected_idx_vis = range(Y_test.shape[1])
        else:
            selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1)

        legitimate_examples = X_test[selected_idx_vis]

        rows = [legitimate_examples]
        rows += map(lambda x: x[selected_idx_vis], X_test_adv_list)

        img_fpath = os.path.join(
            FLAGS.result_folder,
            '%s_attacks_%s_examples.png' % (task_id, attack_string_hash))
        #show_imgs_in_rows(rows, img_fpath)
        print('\n===Adversarial image examples are saved in ', img_fpath)

        # TODO: output the prediction and confidence for each example, both legitimate and adversarial.

    # 6. Evaluate robust classification techniques.
    # Example: --robustness \
    #           "Base;FeatureSqueezing?squeezer=bit_depth_1;FeatureSqueezing?squeezer=median_filter_2;"
    if FLAGS.robustness != '':
        """
        Test the accuracy with robust classifiers.
        Evaluate the accuracy on all the legitimate examples.
        """
        from robustness import evaluate_robustness
        result_folder_robustness = os.path.join(FLAGS.result_folder,
                                                "robustness")
        fname_prefix = "robustness_summary"
        evaluate_robustness(FLAGS.robustness, model_vanilla, Y_test_all, X_test_all, Y_test, \
                            attack_string_list, X_test_adv_discretized_list,
                            fname_prefix, selected_idx_vis, result_folder_robustness)

    # 7. Detection experiment.
    # Example: --detection "FeatureSqueezing?distance_measure=l1&squeezers=median_smoothing_2,bit_depth_4,bilateral_filter_15_15_60;"
    if FLAGS.detection != '':
        from detections.base import DetectionEvaluator

        result_folder_detection = os.path.join(FLAGS.result_folder,
                                               "detection")
        csv_fname = "detection_summary.csv"
        de = DetectionEvaluator(model_vanilla, result_folder_detection,
                                csv_fname, FLAGS.dataset_name)
        Y_test_all_pred = model_vanilla.predict(X_test_all)
        de.build_detection_dataset(X_test_all, Y_test_all, Y_test_all_pred,
                                   selected_idx, X_test_adv_discretized_list,
                                   Y_test_adv_discretized_pred_list,
                                   attack_string_list, attack_string_hash,
                                   FLAGS.clip, Y_test_target_next,
                                   Y_test_target_ll)
        de.evaluate_detections(FLAGS.detection)
예제 #7
0
def main():
    import torch
    seed = 42
    torch.manual_seed(seed)
    np.random.seed(seed)

    ### Data loading
    # mnist = MNISTDataset.load('haar_mnist.pkl')
    # mnist = MNISTDataset.load('filtered_mnist.pkl')
    mnist = MNISTDataset.load()
    (Xtr, Ytr), (Xts, Yts) = mnist.get_train_test(center=True, reduce=True)
    m = 1_0
    X, Y = Xtr[:m], Ytr[:m]
    X_val, Y_val = Xtr[-10_000:], Ytr[-10_000:]

    ### Choice of encoder
    # encoder = LabelEncoder.load_encodings('js_without_0', convert_to_int=True)
    # encoder = LabelEncoder.load_encodings('mario')
    # encoder = LabelEncoder.load_encodings('ideal_mnist', convert_to_int=True)
    encoder = OneHotEncoder(Ytr)
    # encoder = AllPairsEncoder(Ytr)

    ### Choice of weak learner
    # weak_learner = WLThresholdedRidge(threshold=.5)
    # weak_learner = WLRidge
    f_gen = WeightFromBankGenerator(filter_bank=Xtr[-3000:],
                                    filters_shape=(11,11),
                                    filter_processing=center_weight)
    filters = Filters(n_filters=3,
                      weights_generator=f_gen,
                    #   locality=3,
                      maxpool_shape=(3,3))
    # Xtr, X_val, Xts = RandomConvolution.format_data(Xtr), RandomConvolution.format_data(X_val),RandomConvolution.format_data(Xts)
    # Xtr, X_val, Xts = Xtr.to('cuda'), X_val.to('cuda'), Xts.to('cuda')
    weak_learner = RandomConvolution(filters=filters, weak_learner=Ridge)
    # weak_learner = MulticlassDecisionTree(max_n_leaves=4)
    # weak_learner = MulticlassDecisionStump
    # sorted_X, sorted_X_idx = weak_learner.sort_data(X)

    ### Callbacks
    # filename = 'haar_onehot_ds_'
    # filename = 'ideal_mnist_ds_'
    filename = 'test'
    ckpt = ModelCheckpoint(filename=filename+'_{round}.ckpt', dirname='./results', save_last=True)
    logger = CSVLogger(filename=filename+'_log.csv', dirname='./results/log')
    zero_risk = BreakOnZeroRiskCallback()
    tracker = BestRoundTrackerCallback(quantity='valid_acc', monitor='max')
    callbacks = [ckpt,
                logger,
                zero_risk,
                # tracker,
                ]

    ### Fitting the model
    qb = QuadBoostMHCR(weak_learner, encoder=encoder, dampening=1)
    qb.fit(X, Y, max_round_number=2, patience=10,
            X_val=X_val, Y_val=Y_val,
            callbacks=callbacks,
            # n_jobs=1, sorted_X=sorted_X, sorted_X_idx=sorted_X_idx,
            )
    print(f'Best round recap:\nBoosting round {qb.best_round.step_number+1:03d} | Train acc: {qb.best_round.train_acc:.3%} | Valid acc: {qb.best_round.valid_acc:.3%} | Risk: {qb.best_round.risk:.3f}')
    print(f'Test accuracy on best model: {qb.evaluate(Xts, Yts):.3%}')
    print(f'Test accuracy on last model: {qb.evaluate(Xts, Yts, mode="last"):.3%}')
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy, calculate_real_untargeted_mean_confidence

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()

    # 2. Load a trained model.
    sess = load_tf_session()
    #keras.backend.set_learning_phase(0)
    # Define input TF placeholder
    x = tf.placeholder(tf.float32,
                       shape=(None, dataset.image_size, dataset.image_size,
                              dataset.num_channels))
    y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes))

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name,
                                           logits=False,
                                           input_range_type=1)
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['acc'])

    # 3. Evaluate the trained model.
    # TODO: add top-5 accuracy for ImageNet.
    print("Evaluating the pre-trained model...")
    #X_test_all = scipy.ndimage.rotate(X_test_all, 5, reshape=False, axes=(2, 1))
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all, _, _, _ = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    # 4. Select some examples to attack.
    import hashlib
    from datasets import get_first_n_examples_id_each_class

    if FLAGS.select:
        # Filter out the misclassified examples.
        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        if FLAGS.test_mode:
            # Only select the first example of each class.
            correct_and_selected_idx = get_first_n_examples_id_each_class(
                Y_test_all[correct_idx])
            selected_idx = [correct_idx[i] for i in correct_and_selected_idx]
        else:
            if not FLAGS.balance_sampling:
                selected_idx = correct_idx[:FLAGS.nb_examples]
            else:
                # select the same number of examples for each class label.
                nb_examples_per_class = int(FLAGS.nb_examples /
                                            Y_test_all.shape[1])
                correct_and_selected_idx = get_first_n_examples_id_each_class(
                    Y_test_all[correct_idx], n=nb_examples_per_class)
                selected_idx = [
                    correct_idx[i] for i in correct_and_selected_idx
                ]
    else:
        selected_idx = np.array(range(FLAGS.nb_examples))

    from utils.output import format_number_range
    selected_example_idx_ranges = format_number_range(sorted(selected_idx))
    print("Selected %d examples." % len(selected_idx))
    print("Selected index in test set (sorted): %s" %
          selected_example_idx_ranges)
    X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[
        selected_idx], Y_pred_all[selected_idx]

    # The accuracy should be 100%.
    accuracy_selected = calculate_accuracy(Y_pred, Y_test)
    mean_conf_selected, max_conf_selected, min_conf_selected, std_conf_selected = calculate_mean_confidence(
        Y_pred, Y_test)
    print('Test accuracy on selected legitimate examples %.4f' %
          (accuracy_selected))
    print('Mean confidence on ground truth classes, selected %.4f\n' %
          (mean_conf_selected))
    print('max confidence on ground truth classes, selected %.4f\n' %
          (max_conf_selected))
    print('min confidence on ground truth classes, selected %.4f\n' %
          (min_conf_selected))
    print('std confidence on ground truth classes, selected %.4f\n' %
          (std_conf_selected))

    task = {}
    task['dataset_name'] = FLAGS.dataset_name
    task['model_name'] = FLAGS.model_name
    task['accuracy_test'] = accuracy_all
    task['mean_confidence_test'] = mean_conf_all

    task['test_set_selected_length'] = len(selected_idx)
    task['test_set_selected_idx_ranges'] = selected_example_idx_ranges
    task['test_set_selected_idx_hash'] = hashlib.sha1(
        str(selected_idx).encode('utf-8')).hexdigest()
    task['accuracy_test_selected'] = accuracy_selected
    task['mean_confidence_test_selected'] = mean_conf_selected

    task_id = "%s_%d_%s_%s" % \
            (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name'])

    FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id)
    if not os.path.isdir(FLAGS.result_folder):
        os.makedirs(FLAGS.result_folder)

    from utils.output import save_task_descriptor
    save_task_descriptor(FLAGS.result_folder, [task])

    # 5. Generate adversarial examples.
    from attacks import maybe_generate_adv_examples
    from utils.squeeze import reduce_precision_py
    from utils.parameter_parser import parse_params
    attack_string_hash = hashlib.sha1(
        FLAGS.attacks.encode('utf-8')).hexdigest()[:5]
    sample_string_hash = task['test_set_selected_idx_hash'][:5]

    from datasets.datasets_utils import get_next_class, get_least_likely_class, get_most_likely_class
    Y_test_target_next = get_next_class(Y_test)
    Y_test_target_ll = get_least_likely_class(Y_pred)
    Y_test_target_ml = get_most_likely_class(Y_pred)

    X_test_adv_list = []
    X_test_adv_discretized_list = []
    Y_test_adv_discretized_pred_list = []

    attack_string_list = filter(lambda x: len(x) > 0,
                                FLAGS.attacks.lower().split(';'))
    to_csv = []

    X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples')
    adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs')
    predictions_folder = os.path.join(FLAGS.result_folder, 'predictions')
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if not os.path.isdir(folder):
            os.makedirs(folder)

    predictions_fpath = os.path.join(predictions_folder, "legitimate.npy")
    np.save(predictions_fpath, Y_pred, allow_pickle=False)

    if FLAGS.clip >= 0:
        epsilon = FLAGS.clip
        print("Clip the adversarial perturbations by +-%f" % epsilon)
        max_clip = np.clip(X_test + epsilon, 0, 1)
        min_clip = np.clip(X_test - epsilon, 0, 1)

    for attack_string in attack_string_list:
        attack_log_fpath = os.path.join(adv_log_folder,
                                        "%s_%s.log" % (task_id, attack_string))
        attack_name, attack_params = parse_params(attack_string)
        print("\nRunning attack: %s %s" % (attack_name, attack_params))

        if 'targeted' in attack_params:
            targeted = attack_params['targeted']
            print("targeted value: %s" % targeted)
            if targeted == 'next':
                Y_test_target = Y_test_target_next
                #Y_test_target = Y_test.copy()
            elif targeted == 'll':
                Y_test_target = Y_test_target_ll
                #Y_test_target = Y_test.copy()
                #print (Y_test_target_ll)
            elif targeted == 'most':
                Y_test_target = Y_test_target_ml
                #Y_test_target = Y_test.copy()
                #print (Y_test_target_ml)
            elif targeted == False:
                attack_params['targeted'] = False
                Y_test_target = Y_test.copy()
        else:
            targeted = False
            attack_params['targeted'] = False
            Y_test_target = Y_test.copy()
            Y_test_target_all = Y_test_all.copy()

        x_adv_fname = "%s_%s.pickle" % (task_id, attack_string)
        x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname)

        X_test_adv, aux_info = maybe_generate_adv_examples(
            sess,
            model,
            x,
            y,
            X_test,
            Y_test_target,
            attack_name,
            attack_params,
            use_cache=x_adv_fpath,
            verbose=FLAGS.verbose,
            attack_log_fpath=attack_log_fpath)

        if FLAGS.clip > 0:
            # This is L-inf clipping.
            X_test_adv = np.clip(X_test_adv, min_clip, max_clip)

        X_test_adv_list.append(X_test_adv)

        if isinstance(aux_info, float):
            duration = aux_info
        else:
            duration = aux_info['duration']

        dur_per_sample = duration / len(X_test_adv)

        # 5.0 Output predictions.
        Y_test_adv_pred = model.predict(X_test_adv)
        #predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string)
        #np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False)

        # 5.1 Evaluate the adversarial examples being discretized to uint8.
        print("\n---Attack (uint8): %s" % attack_string)
        #import utils.squeeze as squeezer

        # All data should be discretized to uint8.
        X_test_adv_discret = reduce_precision_py(X_test_adv, 256)
        #X_test_adv_discret = reduce_precision_py(X_test_adv, 2)
        X_test_adv_discretized_list.append(X_test_adv_discret)

        Y_test_adv_discret_pred = model.predict(X_test_adv_discret)
        #Y_test_adv_discret_pred1 = to_categorical(np.argmax(model1.predict(X_test_adv_discret), axis=1))

        from LID.extract_artifacts_obfus import get_lid
        from LID.util_obfus import get_noisy_samples, random_split, block_split, train_lr, compute_roc
        from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
        from sklearn.preprocessing import scale, MinMaxScaler, StandardScaler

        #from LID.extract_artifact import *
        #from LID_util import *

        X_test_noisy = get_noisy_samples(X_test, X_test_adv, 'mnist', 'fgsm')

        artifacts, labels = get_lid(model, X_test, X_test_noisy,
                                    X_test_adv_discret, 20, 100, 'mnist')

        #X=artifacts
        #Y=labels

        print(X_test_noisy.shape)
        #print (artifacts.shape)

        # standarization
        scaler = MinMaxScaler().fit(artifacts)
        artifacts = scaler.transform(artifacts)
        # X = scale(X) # Z-norm

        # test attack is the same as training attack
        X_train_lid, Y_train_lid, X_test_lid, Y_test_lid = block_split(
            artifacts, labels)

        ## Build detector
        # print("LR Detector on [dataset: %s, train_attack: %s, test_attack: %s] with:" %
        #       (args.dataset, args.attack, args.test_attack))
        lr = train_lr(X_train_lid, Y_train_lid)

        ## Evaluate detector
        y_pred_lid = lr.predict_proba(X_test_lid)[:, 1]
        y_label_pred = lr.predict(X_test_lid)

        Y_test_lid = np.reshape(Y_test_lid, Y_test_lid.shape[0])

        # AUC
        _, _, auc_score = compute_roc(Y_test_lid[:100],
                                      y_pred_lid[:100],
                                      plot=False)
        precision = precision_score(Y_test_lid[:100], y_label_pred[:100])
        recall = recall_score(Y_test_lid[:100], y_label_pred[:100])

        y_label_pred = lr.predict(X_test_lid[:100])
        acc = accuracy_score(Y_test_lid[:100], y_label_pred[:100])
        print('start measuring LID')
        print(
            'Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall(TPR): %.4f'
            % (auc_score, acc, precision, recall))

        from detections.base import evalulate_detection_test

        a, b, c, d, e = evalulate_detection_test(Y_test_lid[:100],
                                                 y_label_pred[:100])
        f1 = f1_score(Y_test_lid[:100], y_label_pred)

        print(
            'SAE_acc: %0.4f, tpr: %.4f, fpr: %.4f, fdr (1- precision): %.4f, fbr (official name false omission rate): %.4f, f1 score: %.4f'
            % (a, b, c, d, e, f1))
        print('end measuring LID')

    from utils.output import write_to_csv
    attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder,
            "%s_attacks_%s_evaluation.csv" % \
            (task_id, attack_string_hash))
    fieldnames = [
        'dataset_name', 'model_name', 'attack_string', 'duration_per_sample',
        'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist',
        'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel'
    ]
    write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames)

    # 7. Detection experiment.
    # Example: --detection "FeatureSqueezing?distance_measure=l1&squeezers=median_smoothing_2,bit_depth_4,bilateral_filter_15_15_60;"
    if FLAGS.detection != '':
        from detections.base import DetectionEvaluator

        result_folder_detection = os.path.join(FLAGS.result_folder,
                                               "detection")
        csv_fname = "%s_attacks_%s_detection.csv" % (task_id,
                                                     attack_string_hash)
        de = DetectionEvaluator(model, result_folder_detection, csv_fname,
                                FLAGS.dataset_name)
        Y_test_all_pred = model.predict(X_test_all)
        de.build_detection_dataset(X_test_all, Y_test_all, Y_test_all_pred,
                                   selected_idx, X_test_adv_discretized_list,
                                   Y_test_adv_discretized_pred_list,
                                   attack_string_list, attack_string_hash,
                                   FLAGS.clip, Y_test_target_most,
                                   Y_test_target_ll)
        de.evaluate_detections(FLAGS.detection)
예제 #9
0
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy
    from utils.parameter_parser import parse_params

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()
    elif FLAGS.dataset_name == "LFW":
        dataset = LFWDataset()

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()

    # 2. Load a trained model.

    keras.backend.set_learning_phase(0)

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name,
                                           logits=False,
                                           input_range_type=1)
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['acc'])

    X_train_all, Y_train_all = dataset.get_train_dataset()
    if FLAGS.model_name in [
            'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110'
    ] and FLAGS.dataset_name == 'CIFAR-10':
        # for resnet
        x_train_mean = np.mean(X_train_all, axis=0)
        X_test_all -= x_train_mean

    # 3. Evaluate the trained model.
    print("Evaluating the pre-trained model...")
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    if FLAGS.attacks:
        from attacks import load_attack_input

        #attack_string = filter(lambda x: len(x) > 0, FLAGS.attacks.lower())
        attack_string = FLAGS.attacks.lower()

        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        selected_idx = correct_idx[:100]

        X_test_all = load_attack_input(FLAGS.dataset_name, attack_string)

        Y_test_all = Y_test_all[selected_idx]

    if FLAGS.output_verifier != '' and FLAGS.attacks != '':
        for ele in FLAGS.output_verifier.split(';'):
            if ele in [
                    'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110'
            ] and FLAGS.dataset_name == 'CIFAR-10' and FLAGS.model_name not in [
                    'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110'
            ]:
                x_train_mean = np.mean(X_train_all, axis=0)
                X_test_all -= x_train_mean
                break

    # 4. XEnsemble defense.

    input_verified = X_test_all

    # input verifier
    if FLAGS.input_verifier != '':
        from input_verifier_method import get_inverifier_by_name

        inverifier_names = [
            ele.strip() for ele in FLAGS.input_verifier.split(';')
            if ele.strip() != ''
        ]

        for inverifier_name in inverifier_names:

            inverifier = get_inverifier_by_name(inverifier_name, 'python')
            input_verified = np.append(input_verified,
                                       inverifier(X_test_all),
                                       axis=0)

    if FLAGS.output_verifier == '':

        iter = input_verified.shape[0] / X_test_all.shape[0]
        batch_iter = X_test_all.shape[0]

        Y_pred = model.predict(input_verified[0:X_test_all.shape[0]])

        output = model.predict(input_verified[0:X_test_all.shape[0]])
        for i in range(int(iter)):
            if i > 0:
                output = np.append(output,
                                   model.predict(
                                       input_verified[i * batch_iter:(i + 1) *
                                                      batch_iter]),
                                   axis=0)

                Y_pred = Y_pred + model.predict(
                    input_verified[i * batch_iter:(i + 1) * batch_iter])

        Y_pred_inverified = Y_pred / iter  ####TODO Only majority voting is provided here

        from datasets.datasets_utils import calculate_msqueezer_accuracy

        avg = np.zeros((Y_pred_inverified.shape[0], dataset.num_classes))
        for idx in range(Y_pred_inverified.shape[0]):

            if np.max(Y_pred_inverified[idx]) >= 0.6:
                avg[idx] = Y_pred_inverified[idx]
            elif np.max(Y_pred_inverified[idx]) < 0.6:
                avg[idx] = Y_pred_inverified[idx] + 1

        accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all)

        print(
            "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f of the input verifier layer"
            % (accuracy + alert_bad / Y_pred_inverified.shape[0], accuracy,
               alert_bad / Y_pred_inverified.shape[0]))

        accuracy = calculate_accuracy(Y_pred_inverified, Y_test_all)

        print('Majority Voting Test accuracy %.4f' % (accuracy))

    # outputput verifier
    if FLAGS.output_verifier != '':

        Y_pred_model_verified = np.zeros(
            (X_test_all.shape[0], dataset.num_classes))

        model_verifier_names = [
            ele.strip() for ele in FLAGS.output_verifier.split(';')
            if ele.strip() != ''
        ]
        selected_model_verifier_names = model_verifier_names

        size_base = len(model_verifier_names)
        size_team = size_base

        prediction_base = np.zeros(
            (size_base, Y_test_all.shape[0], Y_test_all.shape[1]))
        prediction_base_train = np.zeros(
            (size_base, 5000, Y_train_all.shape[1]))

        model_list = range(size_base)
        for i, model_verifier_name in enumerate(model_verifier_names):
            model_verifier = dataset.load_model_by_name(model_verifier_name,
                                                        logits=False,
                                                        input_range_type=1)

            prediction_base[i] = model_verifier.predict(X_test_all)

            locals()['model_verifier' + str(i)] = dataset.load_model_by_name(
                model_verifier_name, logits=False, input_range_type=1)
            prediction_base_train[i] = model_verifier.predict(
                X_train_all[:5000])

        model_list = [0, 1, 2]

        selected_model_verifier_names = []
        for i in range(len(model_list)):
            selected_model_verifier_names.append(model_verifier_names[i])

        #ensemble on selected models
        for m, model_verifier_name in enumerate(selected_model_verifier_names):
            model_verifier = dataset.load_model_by_name(model_verifier_name,
                                                        logits=False,
                                                        input_range_type=1)
            model_verifier.compile(loss='categorical_crossentropy',
                                   optimizer='sgd',
                                   metrics=['acc'])

            iter = input_verified.shape[0] / X_test_all.shape[0]
            batch_iter = X_test_all.shape[0]

            Y_pred = model_verifier.predict(
                input_verified[0:X_test_all.shape[0]])

            #output = model_verifier.predict(input_verified[0:X_test_all.shape[0]])

            for i in range(int(iter)):
                if i > 0:
                    #output=np.append(output,model_verifier.predict(input_verified[i*batch_iter:(i+1)*batch_iter]),axis=0)

                    Y_pred = Y_pred + model_verifier.predict(
                        input_verified[i * batch_iter:(i + 1) * batch_iter])

            majority_weight = np.ones(size_team)
            weighted_pred = np.zeros((size_team, 5000, Y_train_all.shape[1]))

            Y_pred_model_verified = Y_pred_model_verified + majority_weight[
                m] * Y_pred / iter

        Y_pred_model_verified = Y_pred_model_verified / np.sum(majority_weight)

        from datasets.datasets_utils import calculate_msqueezer_accuracy

        avg = np.zeros((Y_pred_model_verified.shape[0], dataset.num_classes))
        for idx in range(Y_pred_model_verified.shape[0]):

            if np.max(Y_pred_model_verified[idx]) >= 0.6:
                avg[idx] = Y_pred_model_verified[idx]
            elif np.max(Y_pred_model_verified[idx]) < 0.6:
                avg[idx] = Y_pred_model_verified[idx] + 1

        accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all)

        print(
            "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f"
            % (accuracy + alert_bad / Y_pred_model_verified.shape[0], accuracy,
               alert_bad / Y_pred_model_verified.shape[0]))

        accuracy = calculate_accuracy(Y_pred_model_verified, Y_test_all)

        print('Majority Voting Test accuracy %.4f' % (accuracy))

        #comparison

        try:
            #Adversarial training
            model_advt = dataset.load_model_by_name('cnn2_adv_trained',
                                                    logits=False,
                                                    input_range_type=1)
            pred_advt = model_advt(X_test_all)
            accuracy, _, alert_bad = calculate_msqueezer_accuracy(
                pred_advt, Y_test_all)
            print('Adversarial training Test accuracy %.4f' % (accuracy))

            # Defensive Distillation
            model_dd = dataset.load_model_by_name('distillation',
                                                  logits=False,
                                                  input_range_type=1)
            pred_dd = model_dd(X_test_all)
            accuracy, _, alert_bad = calculate_msqueezer_accuracy(
                pred_dd, Y_test_all)
            print('Defensive Distillation Test accuracy %.4f' % (accuracy))

            # Input transformation
            if FLAGS.dataset_name == 'MNIST':
                ensembles_size = 10
                crop_size = 24
                image_size = 28

            if FLAGS.dataset_name == 'CIFAR-10':
                ensembles_size = 10
                crop_size = 28
                image_size = 32

                start_max = image_size - crop_size

                ensembles_def_pred = 0
                for i in xrange(ensembles_size):
                    start_x = np.random.randint(0, start_max)
                    start_y = np.random.randint(0, start_max)
                    # boxes = [[start_y, start_x, start_y + frac, start_x + frac]]
                    X_test_all_crop = X_test_all[:,
                                                 start_x:start_x + crop_size,
                                                 start_y:start_y +
                                                 crop_size, :]

                    if FLAGS.dataset_name == 'MNIST':
                        X_test_all_rescale = np.zeros(
                            (X_test_all.shape[0], 28, 28, 1))
                    if FLAGS.dataset_name == 'CIFAR-10':
                        X_test_all_rescale = np.zeros(
                            (X_test_all.shape[0], 32, 32, 3))
                    for i in xrange(X_test_all_crop.shape[0]):
                        X_test_all_rescale[i] = rescale(
                            X_test_all_crop[i],
                            np.float(image_size) / crop_size)
                    X_test_all_discret_rescale = reduce_precision_py(
                        X_test_all_rescale,
                        256)  # need to put input into the ensemble
                    pred = model.predict(X_test_all_discret_rescale)

                    ensembles_def_pred = ensembles_def_pred + pred

                Y_defend_all = ensembles_def_pred / ensembles_size

                # All data should be discretized to uint8.

                X_test_all_discret = reduce_precision_py(X_test_all, 256)
                Y_test_all_discret_pred = model.predict(X_test_all_discret)
                accuracy, _, alert_bad = calculate_msqueezer_accuracy(
                    Y_test_all_discret_pred, Y_test_all)
                print('Input transformation ensemble Test accuracy %.4f' %
                      (accuracy))

        except:
            raise
예제 #10
0
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()

    # 2. Load a trained model.
    sess = load_tf_session()
    keras.backend.set_learning_phase(0)
    # Define input TF placeholder
    x = tf.placeholder(tf.float32,
                       shape=(None, dataset.image_size, dataset.image_size,
                              dataset.num_channels))
    y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes))

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name,
                                           logits=False,
                                           input_range_type=1)
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['acc'])

    # 3. Evaluate the trained model.
    # TODO: add top-5 accuracy for ImageNet.
    print("Evaluating the pre-trained model...")
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    # 4. Select some examples to attack.
    import hashlib
    from datasets import get_first_n_examples_id_each_class

    if FLAGS.select:
        # Filter out the misclassified examples.
        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        if FLAGS.test_mode:
            # Only select the first example of each class.
            correct_and_selected_idx = get_first_n_examples_id_each_class(
                Y_test_all[correct_idx])
            selected_idx = [correct_idx[i] for i in correct_and_selected_idx]
        else:
            if not FLAGS.balance_sampling:
                selected_idx = correct_idx[:FLAGS.nb_examples]
            else:
                # select the same number of examples for each class label.
                nb_examples_per_class = int(FLAGS.nb_examples /
                                            Y_test_all.shape[1])
                correct_and_selected_idx = get_first_n_examples_id_each_class(
                    Y_test_all[correct_idx], n=nb_examples_per_class)
                selected_idx = [
                    correct_idx[i] for i in correct_and_selected_idx
                ]
    else:
        selected_idx = np.array(range(FLAGS.nb_examples))

    from utils.output import format_number_range
    selected_example_idx_ranges = format_number_range(sorted(selected_idx))
    print("Selected %d examples." % len(selected_idx))
    print("Selected index in test set (sorted): %s" %
          selected_example_idx_ranges)
    X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[
        selected_idx], Y_pred_all[selected_idx]

    # The accuracy should be 100%.
    accuracy_selected = calculate_accuracy(Y_pred, Y_test)
    mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test)
    print('Test accuracy on selected legitimate examples %.4f' %
          (accuracy_selected))
    print('Mean confidence on ground truth classes, selected %.4f\n' %
          (mean_conf_selected))

    task = {}
    task['dataset_name'] = FLAGS.dataset_name
    task['model_name'] = FLAGS.model_name
    task['accuracy_test'] = accuracy_all
    task['mean_confidence_test'] = mean_conf_all

    task['test_set_selected_length'] = len(selected_idx)
    task['test_set_selected_idx_ranges'] = selected_example_idx_ranges
    task['test_set_selected_idx_hash'] = hashlib.sha1(
        str(selected_idx).encode('utf-8')).hexdigest()
    task['accuracy_test_selected'] = accuracy_selected
    task['mean_confidence_test_selected'] = mean_conf_selected

    task_id = "%s_%d_%s_%s" % \
            (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name'])

    FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id)
    if not os.path.isdir(FLAGS.result_folder):
        os.makedirs(FLAGS.result_folder)

    from utils.output import save_task_descriptor
    save_task_descriptor(FLAGS.result_folder, [task])

    # 5. Generate adversarial examples.
    from attacks import maybe_generate_adv_examples
    from utils.squeeze import reduce_precision_py
    from utils.parameter_parser import parse_params
    attack_string_hash = hashlib.sha1(
        FLAGS.attacks.encode('utf-8')).hexdigest()[:5]
    sample_string_hash = task['test_set_selected_idx_hash'][:5]

    from datasets.datasets_utils import get_next_class, get_least_likely_class
    Y_test_target_next = get_next_class(Y_test)
    Y_test_target_ll = get_least_likely_class(Y_pred)

    X_test_adv_list = []
    X_test_adv_discretized_list = []
    Y_test_adv_discretized_pred_list = []

    attack_string_list = filter(lambda x: len(x) > 0,
                                FLAGS.attacks.lower().split(';'))
    to_csv = []

    X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples')
    adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs')
    predictions_folder = os.path.join(FLAGS.result_folder, 'predictions')
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if not os.path.isdir(folder):
            os.makedirs(folder)

    predictions_fpath = os.path.join(predictions_folder, "legitimate.npy")
    np.save(predictions_fpath, Y_pred, allow_pickle=False)

    if FLAGS.clip >= 0:
        epsilon = FLAGS.clip
        print("Clip the adversarial perturbations by +-%f" % epsilon)
        max_clip = np.clip(X_test + epsilon, 0, 1)
        min_clip = np.clip(X_test - epsilon, 0, 1)

    for attack_string in attack_string_list:
        attack_log_fpath = os.path.join(adv_log_folder,
                                        "%s_%s.log" % (task_id, attack_string))
        attack_name, attack_params = parse_params(attack_string)
        print("\nRunning attack: %s %s" % (attack_name, attack_params))

        if 'targeted' in attack_params:
            targeted = attack_params['targeted']
            print("targeted value: %s" % targeted)
            if targeted == 'next':
                Y_test_target = Y_test_target_next
            elif targeted == 'll':
                Y_test_target = Y_test_target_ll
            elif targeted == False:
                attack_params['targeted'] = False
                Y_test_target = Y_test.copy()
        else:
            targeted = False
            attack_params['targeted'] = False
            Y_test_target = Y_test.copy()

        x_adv_fname = "%s_%s.pickle" % (task_id, attack_string)
        x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname)

        X_test_adv, aux_info = maybe_generate_adv_examples(
            sess,
            model,
            x,
            y,
            X_test,
            Y_test_target,
            attack_name,
            attack_params,
            use_cache=x_adv_fpath,
            verbose=FLAGS.verbose,
            attack_log_fpath=attack_log_fpath)

        if FLAGS.clip > 0:
            # This is L-inf clipping.
            X_test_adv = np.clip(X_test_adv, min_clip, max_clip)

        X_test_adv_list.append(X_test_adv)

        if isinstance(aux_info, float):
            duration = aux_info
        else:
            duration = aux_info['duration']

        dur_per_sample = duration / len(X_test_adv)

        # 5.0 Output predictions.
        Y_test_adv_pred = model.predict(X_test_adv)
        predictions_fpath = os.path.join(predictions_folder,
                                         "%s.npy" % attack_string)
        np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False)

        # 5.1 Evaluate the adversarial examples being discretized to uint8.
        print("\n---Attack (uint8): %s" % attack_string)
        # All data should be discretized to uint8.
        X_test_adv_discret = reduce_precision_py(X_test_adv, 256)
        X_test_adv_discretized_list.append(X_test_adv_discret)
        Y_test_adv_discret_pred = model.predict(X_test_adv_discret)
        Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred)

        rec = evaluate_adversarial_examples(X_test, Y_test, X_test_adv_discret,
                                            Y_test_target.copy(), targeted,
                                            Y_test_adv_discret_pred)
        rec['dataset_name'] = FLAGS.dataset_name
        rec['model_name'] = FLAGS.model_name
        rec['attack_string'] = attack_string
        rec['duration_per_sample'] = dur_per_sample
        rec['discretization'] = True
        to_csv.append(rec)

    from utils.output import write_to_csv
    attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder,
            "%s_attacks_%s_evaluation.csv" % \
            (task_id, attack_string_hash))
    fieldnames = [
        'dataset_name', 'model_name', 'attack_string', 'duration_per_sample',
        'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist',
        'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel'
    ]
    write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames)

    if FLAGS.visualize is True:
        from datasets.visualization import show_imgs_in_rows
        if FLAGS.test_mode or FLAGS.balance_sampling:
            selected_idx_vis = range(Y_test.shape[1])
        else:
            selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1)

        legitimate_examples = X_test[selected_idx_vis]

        rows = [legitimate_examples]
        rows += map(lambda x: x[selected_idx_vis], X_test_adv_list)

        img_fpath = os.path.join(
            FLAGS.result_folder,
            '%s_attacks_%s_examples.png' % (task_id, attack_string_hash))
        show_imgs_in_rows(rows, img_fpath)
        print('\n===Adversarial image examples are saved in ', img_fpath)

        # TODO: output the prediction and confidence for each example, both legitimate and adversarial.

    # 6. Evaluate robust classification techniques.
    # Example: --robustness \
    #           "Base;FeatureSqueezing?squeezer=bit_depth_1;FeatureSqueezing?squeezer=median_filter_2;"
    if FLAGS.robustness != '':
        """
        Test the accuracy with robust classifiers.
        Evaluate the accuracy on all the legitimate examples.
        """
        from robustness import evaluate_robustness
        result_folder_robustness = os.path.join(FLAGS.result_folder,
                                                "robustness")
        fname_prefix = "%s_%s_robustness" % (task_id, attack_string_hash)
        evaluate_robustness(FLAGS.robustness, model, Y_test_all, X_test_all, Y_test, \
                attack_string_list, X_test_adv_discretized_list,
                fname_prefix, selected_idx_vis, result_folder_robustness)
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy
    from utils.parameter_parser import parse_params

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()
    elif FLAGS.dataset_name == "LFW":
        dataset = LFWDataset()

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()

    # 2. Load a trained model.

    keras.backend.set_learning_phase(0)

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name,
                                           logits=False,
                                           input_range_type=1)
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['acc'])

    X_train_all, Y_train_all = dataset.get_train_dataset()
    if FLAGS.model_name in [
            'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110'
    ] and FLAGS.dataset_name == 'CIFAR-10':
        # for resnet
        x_train_mean = np.mean(X_train_all, axis=0)
        X_test_all -= x_train_mean

    # 3. Evaluate the trained model.
    print("Evaluating the pre-trained model...")
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    if FLAGS.attacks:
        from attacks import load_attack_input

        #attack_string = filter(lambda x: len(x) > 0, FLAGS.attacks.lower())
        attack_string = FLAGS.attacks.lower()

        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        selected_idx = correct_idx[:100]

        X_test_all = load_attack_input(FLAGS.dataset_name, attack_string)
        Y_test_all = Y_test_all[selected_idx]

    # 4. Input denoise defense.

    input_verified = X_test_all

    # input verifier
    if FLAGS.input_verifier != '':
        from input_verifier_method import get_inverifier_by_name

        inverifier_names = [
            ele.strip() for ele in FLAGS.input_verifier.split(';')
            if ele.strip() != ''
        ]

        for inverifier_name in inverifier_names:

            inverifier = get_inverifier_by_name(inverifier_name, 'python')
            input_verified = np.append(input_verified,
                                       inverifier(X_test_all),
                                       axis=0)

    iter = input_verified.shape[0] / X_test_all.shape[0]
    batch_iter = X_test_all.shape[0]

    Y_pred = model.predict(input_verified[0:X_test_all.shape[0]])

    output = model.predict(input_verified[0:X_test_all.shape[0]])
    for i in range(int(iter)):
        if i > 0:
            output = np.append(output,
                               model.predict(
                                   input_verified[i * batch_iter:(i + 1) *
                                                  batch_iter]),
                               axis=0)

            Y_pred = Y_pred + model.predict(
                input_verified[i * batch_iter:(i + 1) * batch_iter])

    Y_pred_inverified = Y_pred / iter

    from datasets.datasets_utils import calculate_msqueezer_accuracy

    avg = np.zeros((Y_pred_inverified.shape[0], dataset.num_classes))
    for idx in range(Y_pred_inverified.shape[0]):

        if np.max(Y_pred_inverified[idx]) >= 0.6:
            avg[idx] = Y_pred_inverified[idx]
        elif np.max(Y_pred_inverified[idx]) < 0.6:
            avg[idx] = Y_pred_inverified[idx] + 1

    accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all)

    print(
        "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f of the input verifier layer"
        % (accuracy + alert_bad / Y_pred_inverified.shape[0], accuracy,
           alert_bad / Y_pred_inverified.shape[0]))

    accuracy = calculate_accuracy(Y_pred_inverified, Y_test_all)

    print('Majority Voting Test accuracy %.4f' % (accuracy))
예제 #12
0
Created on Fri Apr 24 17:11:35 2020
https://blog.aloni.org/posts/backprop-with-tensorflow/
@author: user
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from datasets import MNISTDataset


mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

data = MNISTDataset(train_images.reshape([-1, 784]), train_labels, 
                    test_images.reshape([-1, 784]), test_labels,
                    batch_size=128)

def experimentOne():
    training_steps = 1000
    lr = 0.05
    W1 = tf.Variable(np.random.rand(784, 10), dtype= tf.float32)
    b1 = tf.Variable(np.random.rand(1, 10), dtype= tf.float32)
    train_stats = {}
    train_stats["acc"] = []
    train_stats["steps"] = []
    train_stats["loss"] = []
    for step in range(training_steps):
        img_batch, lbl_batch = data.next_batch()
        with tf.GradientTape() as tape:
            logits = tf.matmul(img_batch, W1) + b1
예제 #13
0
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples2, calculate_mean_confidence, calculate_accuracy
    from utils.parameter_parser import parse_params

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()
    elif FLAGS.dataset_name == "LFW":
        dataset = LFWDataset()

    # 1. Load a dataset.
    print("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()
    #z = np.where(Y_test_all == np.asarray([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

    #LABEL SELECTION
    label = np.asarray([0] * Y_test_all.shape[1])
    label[FLAGS.label_index] = 1
    filter_indices = []
    for i in range(len(Y_test_all)):
        if np.array_equal(Y_test_all[i], label):
            filter_indices.append(i)
    print(X_test_all.shape, Y_test_all.shape)
    X_test_all = np.take(X_test_all, filter_indices, 0)
    Y_test_all = np.take(Y_test_all, filter_indices, 0)
    print(X_test_all.shape, Y_test_all.shape)

    # 2. Load a trained model.
    sess = load_tf_session()
    keras.backend.set_learning_phase(0)
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels))
    y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes))

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1)
        model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc'])

    # 3. Evaluate the trained model.
    # TODO: add top-5 accuracy for ImageNet.
    print("Evaluating the pre-trained model...")
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))

    # 4. Select some examples to attack.
    import hashlib
    from datasets import get_first_n_examples_id_each_class

    if FLAGS.select:
        # Filter out the misclassified examples.
        correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
        if FLAGS.test_mode:
            # Only select the first example of each class.
            correct_and_selected_idx = get_first_n_examples_id_each_class(Y_test_all[correct_idx])
            selected_idx = [correct_idx[i] for i in correct_and_selected_idx]
        else:
            if not FLAGS.balance_sampling:
                # TODO: Possibly randomize this
                if FLAGS.random_image != 0:
                    np.random.seed(FLAGS.random_image)
                    print("RANDOM NUMBER")
                    print(np.random.randint(100))
                    np.random.shuffle(correct_idx)
                selected_idx = correct_idx[:FLAGS.nb_examples]
            else:
                # select the same number of examples for each class label.
                nb_examples_per_class = int(FLAGS.nb_examples / Y_test_all.shape[1])
                correct_and_selected_idx = get_first_n_examples_id_each_class(Y_test_all[correct_idx], n=nb_examples_per_class)
                selected_idx = [correct_idx[i] for i in correct_and_selected_idx]
    else:
        selected_idx = np.array(range(FLAGS.nb_examples))

    from utils.output import format_number_range
    selected_example_idx_ranges = format_number_range(sorted(selected_idx))
    print("Selected %d examples." % len(selected_idx))
    print("Selected index in test set (sorted): %s" % selected_example_idx_ranges)
    X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[selected_idx], Y_pred_all[selected_idx]

    # The accuracy should be 100%.
    accuracy_selected = calculate_accuracy(Y_pred, Y_test)
    mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test)
    print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected))
    print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected))

    task = {}
    task['dataset_name'] = FLAGS.dataset_name
    task['model_name'] = FLAGS.model_name
    task['accuracy_test'] = accuracy_all
    task['mean_confidence_test'] = mean_conf_all

    task['test_set_selected_length'] = len(selected_idx)
    task['test_set_selected_idx_ranges'] = selected_example_idx_ranges
    task['test_set_selected_idx_hash'] = hashlib.sha1(str(selected_idx).encode('utf-8')).hexdigest()
    task['accuracy_test_selected'] = accuracy_selected
    task['mean_confidence_test_selected'] = mean_conf_selected

    #task_id = "%s_%d_%s_%s" % \
     #   (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name'])

    task_id = "%s_%s" % \
           (task['dataset_name'], task['model_name'])

    FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id)
    if os.path.exists(FLAGS.result_folder):
        print("RESULTS FOLDER")
        print(FLAGS.result_folder)
        shutil.rmtree(FLAGS.result_folder)
    if not os.path.isdir(FLAGS.result_folder):
        os.makedirs(FLAGS.result_folder)

    from utils.output import save_task_descriptor2
    save_task_descriptor2(FLAGS.result_folder, [task])

    # 5. Generate adversarial examples.
    from attacks import maybe_generate_adv_examples
    from utils.squeeze import reduce_precision_py

    #attack_string_hash = hashlib.sha1(FLAGS.attacks.encode('utf-8')).hexdigest()[:5]
    attack_string_hash = FLAGS.attacks.encode('utf-8')

    from datasets.datasets_utils import get_next_class, get_most_likely_class, get_least_likely_class
    Y_test_target_next = get_next_class(Y_test)
    Y_test_target_most = get_most_likely_class(Y_test)
    Y_test_target_ll = get_least_likely_class(Y_pred)

    X_test_adv_list = []
    X_test_adv_discretized_list = []
    Y_test_adv_discretized_pred_list = []

    attack_string_list = filter(lambda x: len(x) > 0, FLAGS.attacks.lower().split(';'))
    to_csv = []

    X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples')
    adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs')
    predictions_folder = os.path.join(FLAGS.result_folder, 'predictions')
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if os.path.isdir(folder):
            #os.rmdir(folder)
            shutil.rmtree(folder)
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if not os.path.isdir(folder):
            os.makedirs(folder)

    predictions_fpath = os.path.join(predictions_folder, "legitimate.npy")
    np.save(predictions_fpath, Y_pred, allow_pickle=False)

    if FLAGS.clip >= 0:
        epsilon = FLAGS.clip
        print("Clip the adversarial perturbations by +-%f" % epsilon)
        max_clip = np.clip(X_test + epsilon, 0, 1)
        min_clip = np.clip(X_test - epsilon, 0, 1)

    for attack_string in attack_string_list:
        attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string))
        attack_name, attack_params = parse_params(attack_string)
        print("\nRunning attack: %s %s" % (attack_name, attack_params))

        if 'targeted' in attack_params:
            targeted = attack_params['targeted']
            print("targeted value: %s" % targeted)
            if targeted == 'next':
                Y_test_target = Y_test_target_next
            elif targeted == 'most':
                Y_test_target = Y_test_target_most
            elif targeted == 'll':
                Y_test_target = Y_test_target_ll
            elif targeted is False:
                attack_params['targeted'] = False
                Y_test_target = Y_test.copy()
        else:
            targeted = False
            attack_params['targeted'] = False
            Y_test_target = Y_test.copy()

        x_adv_fname = "%s_%s.pickle" % (task_id, attack_string)
        x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname)

        X_test_adv, aux_info = maybe_generate_adv_examples(sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache=x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath)

        if FLAGS.clip > 0:
            # This is L-inf clipping.
            X_test_adv = np.clip(X_test_adv, min_clip, max_clip)

        X_test_adv_list.append(X_test_adv)

        if isinstance(aux_info, float):
            duration = aux_info
        else:
            duration = aux_info['duration']

        dur_per_sample = duration / len(X_test_adv)

        # 5.0 Output predictions.
        # Y_test_adv_pred = model.predict(X_test_adv)
        # predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string)
        # np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False)

        # 5.1 Evaluate the adversarial examples being discretized to uint8.
        print("\n---Attack (uint8): %s" % attack_string)
        # All data should be discretized to uint8.
        X_test_adv_discret = reduce_precision_py(X_test_adv, 256)
        X_test_adv_discretized_list.append(X_test_adv_discret)
        Y_test_adv_discret_pred = model.predict(X_test_adv_discret)
        Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred)

        rec = evaluate_adversarial_examples2(X_test, Y_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred, attack_string)
        confidences = rec['confidence_scores']
        preds = np.argmax(Y_test_adv_discret_pred,axis=1)
        k = 0
        confidence_scores = ""
        preds_after_attack = ""
        mean = 0
        for pred in preds:
            preds_after_attack += str(pred) + ","
            if pred == FLAGS.label_index:
                confidence_scores += str(float("nan")) + ","
            else:
                try:
                    confidence_scores += str(confidences[k]) + ","
                    mean += float(confidences[k])
                except:
                    confidence_scores += str(float("nan")) + ","
                k += 1
        mean /= len(preds)
        rec['confidence_scores'] = confidence_scores.rstrip(",")
        rec['dataset_name'] = FLAGS.dataset_name
        rec['model_name'] = FLAGS.model_name
        rec['attack_string'] = attack_string
        rec['original_label_index'] = FLAGS.label_index
        rec['random'] = True if FLAGS.random_image != 0 else False
        rec['duration_per_sample'] = dur_per_sample
        rec['discretization'] = True
        rec['prediction_after_attack'] = preds_after_attack.rstrip(",")
        rec['number_of_images'] = FLAGS.nb_examples
        rec['mean_confidence'] = mean
        to_csv.append(rec)

    from utils.output import write_to_csv
    attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder,"evaluation.csv")
    fieldnames = ['dataset_name', 'model_name', 'attack_string', 'original_label_index', 'random',  'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'confidence_scores', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel', 'prediction_after_attack', 'number_of_images']
    write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames)

    if FLAGS.visualize is True:
        from datasets.visualization import show_imgs_in_rows2
        if FLAGS.test_mode or FLAGS.balance_sampling:
            selected_idx_vis = range(Y_test.shape[1])
        else:
            #selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1)
            #selected_idx_vis = selected_idx
            selected_idx_vis = [i for i in range(FLAGS.nb_examples)]
        legitimate_examples = X_test[selected_idx_vis]

        rows = [legitimate_examples]
        rows += map(lambda x: x[selected_idx_vis], X_test_adv_list)
        img_fpath = os.path.join(FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash))
        show_imgs_in_rows2(rows, dataset.num_channels, img_fpath)
        print('\n===Adversarial image examples are saved in ', img_fpath)
        print(Y_test_adv_discretized_pred_list)

        """rows = [legitimate_examples]
예제 #14
0
                        help='how many number of gradients we wish to gather at each iteration')
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    # this is only a simple test case
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    world_size = comm.Get_size()

    args = add_fit_args(argparse.ArgumentParser(description='PyTorch MNIST Single Machine Test'))

    # fetch dataset
    if args.dataset == "MNIST":
        mnist_data = mnist.read_data_sets(train_dir='./mnist_data', reshape=True)
        train_set = MNISTDataset(dataset=mnist_data.train, transform=transforms.ToTensor())
    elif args.dataset == "Cifar10":
        cifar10_data = cifar10.read_data_sets(padding_size=0, reshape=True)
        train_set = Cifar10Dataset(dataset=cifar10_data.train, transform=transforms.ToTensor())

    kwargs_master = {'batch_size':args.batch_size, 'learning_rate':args.lr, 'max_epochs':args.epochs, 'momentum':args.momentum, 'network':args.network,
                'comm_method':args.comm_type, 'kill_threshold': args.num_aggregate}

    kwargs_worker = {'batch_size':args.batch_size, 'learning_rate':args.lr, 'max_epochs':args.epochs, 'momentum':args.momentum, 'network':args.network,
                'comm_method':args.comm_type}

    if rank == 0:
        master_fc_nn = SyncReplicasMaster_NN(comm=comm, **kwargs_master)
        master_fc_nn.build_model()
        print("I am the master: the world size is {}, cur step: {}".format(master_fc_nn.world_size, master_fc_nn.cur_step))
        master_fc_nn.train()
예제 #15
0
def main(argv=None):
    # 0. Select a dataset.
    from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset
    from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy

    if FLAGS.dataset_name == "MNIST":
        dataset = MNISTDataset()
    elif FLAGS.dataset_name == "CIFAR-10":
        dataset = CIFAR10Dataset()
    elif FLAGS.dataset_name == "ImageNet":
        dataset = ImageNetDataset()


    # 1. Load a dataset.
    print ("\n===Loading %s data..." % FLAGS.dataset_name)
    if FLAGS.dataset_name == 'ImageNet':
        if FLAGS.model_name == 'inceptionv3':
            img_size = 299
        else:
            img_size = 224
        X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200)
    else:
        X_test_all, Y_test_all = dataset.get_test_dataset()


    # 2. Load a trained model.
    sess = load_tf_session()
    keras.backend.set_learning_phase(0)
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels))
    y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes))

    with tf.variable_scope(FLAGS.model_name):
        """
        Create a model instance for prediction.
        The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...}
        """
        model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1)
        model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['acc'])


    # 3. Evaluate the trained model.
    # TODO: add top-5 accuracy for ImageNet.
    print ("Evaluating the pre-trained model...")
    Y_pred_all = model.predict(X_test_all)
    mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all)
    accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all)
    print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
    print('Mean confidence on ground truth classes %.4f' % (mean_conf_all))


    # 4. Select some examples to attack.
    import hashlib
    from datasets import get_first_example_id_each_class
    # Filter out the misclassified examples.
    correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all)
    if FLAGS.test_mode:
        # Only select the first example of each class.
        correct_and_selected_idx = get_first_example_id_each_class(Y_test_all[correct_idx])
        selected_idx = [ correct_idx[i] for i in correct_and_selected_idx ]
    else:
        selected_idx = correct_idx[:FLAGS.nb_examples]

    from utils.output import format_number_range
    selected_example_idx_ranges = format_number_range(sorted(selected_idx))
    print ( "Selected %d examples." % len(selected_idx))
    print ( "Selected index in test set (sorted): %s" % selected_example_idx_ranges )

    X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[selected_idx], Y_pred_all[selected_idx]

    accuracy_selected = calculate_accuracy(Y_pred, Y_test)
    mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test)
    print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected))
    print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected))

    task = {}
    task['dataset_name'] = FLAGS.dataset_name
    task['model_name'] = FLAGS.model_name
    task['accuracy_test'] = accuracy_all
    task['mean_confidence_test'] = mean_conf_all

    task['test_set_selected_length'] = len(selected_idx)
    task['test_set_selected_idx_ranges'] = selected_example_idx_ranges
    task['test_set_selected_idx_hash'] = hashlib.sha1(str(selected_idx).encode('utf-8')).hexdigest()
    task['accuracy_test_selected'] = accuracy_selected
    task['mean_confidence_test_selected'] = mean_conf_selected

    task_id = "%s_%d_%s_%s" % \
            (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name'])

    FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id)
    if not os.path.isdir(FLAGS.result_folder):
        os.makedirs(FLAGS.result_folder)

    from utils.output import save_task_descriptor
    save_task_descriptor(FLAGS.result_folder, [task])


    # 5. Generate adversarial examples.
    from attacks import maybe_generate_adv_examples, parse_attack_string
    from defenses.feature_squeezing.squeeze import reduce_precision_np
    attack_string_hash = hashlib.sha1(FLAGS.attacks.encode('utf-8')).hexdigest()[:5]
    sample_string_hash = task['test_set_selected_idx_hash'][:5]

    from attacks import get_next_class, get_least_likely_class
    Y_test_target_next = get_next_class(Y_test)
    Y_test_target_ll = get_least_likely_class(Y_pred)

    X_test_adv_list = []

    attack_string_list = filter(lambda x:len(x)>0, FLAGS.attacks.lower().split(';'))
    to_csv = []

    X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples')
    adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs')
    predictions_folder = os.path.join(FLAGS.result_folder, 'predictions')
    for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]:
        if not os.path.isdir(folder):
            os.makedirs(folder)

    predictions_fpath = os.path.join(predictions_folder, "legitimate.npy")
    np.save(predictions_fpath, Y_pred, allow_pickle=False)

    for attack_string in attack_string_list:
        attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string))
        attack_name, attack_params = parse_attack_string(attack_string)
        print ( "\nRunning attack: %s %s" % (attack_name, attack_params))

        if 'targeted' in attack_params:
            targeted = attack_params['targeted']
            if targeted == 'next':
                Y_test_target = Y_test_target_next
            elif targeted == 'll':
                Y_test_target = Y_test_target_ll
        else:
            targeted = False
            attack_params['targeted'] = False
            Y_test_target = Y_test.copy()

        x_adv_fname = "%s_%s.pickle" % (task_id, attack_string)
        x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname)

        X_test_adv, aux_info = maybe_generate_adv_examples(sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache = x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath)
        X_test_adv_list.append(X_test_adv)

        if isinstance(aux_info, float):
            duration = aux_info
        else:
            print (aux_info)
            duration = aux_info['duration']

        dur_per_sample = duration / len(X_test_adv)

        # 5.0 Output predictions.
        Y_test_adv_pred = model.predict(X_test_adv)
        predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string)
        np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False)

        # 5.1. Evaluate the quality of adversarial examples

        print ("\n---Attack: %s" % attack_string)
        rec = evaluate_adversarial_examples(X_test, X_test_adv, Y_test_target.copy(), targeted, Y_test_adv_pred)
        print ("Duration per sample: %.1fs" % dur_per_sample)
        rec['dataset_name'] = FLAGS.dataset_name
        rec['model_name'] = FLAGS.model_name
        rec['attack_string'] = attack_string
        rec['duration_per_sample'] = dur_per_sample
        rec['discretization'] = False
        to_csv.append(rec)

        # 5.2 Adversarial examples being discretized to uint8.
        print ("\n---Attack (uint8): %s" % attack_string)
        X_test_adv_discret = reduce_precision_np(X_test_adv, 256)
        Y_test_adv_discret_pred = model.predict(X_test_adv_discret)
        rec = evaluate_adversarial_examples(X_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred)
        rec['dataset_name'] = FLAGS.dataset_name
        rec['model_name'] = FLAGS.model_name
        rec['attack_string'] = attack_string
        rec['duration_per_sample'] = dur_per_sample
        rec['discretization'] = True
        to_csv.append(rec)


    from utils.output import write_to_csv
    attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder, 
            "%s_attacks_%s_evaluation.csv" % \
            (task_id, attack_string_hash))
    fieldnames = ['dataset_name', 'model_name', 'attack_string', 'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel']
    write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames)


    if FLAGS.visualize is True:
        from datasets.visualization import show_imgs_in_rows
        if FLAGS.test_mode:
            selected_idx_vis = range(Y_test.shape[1])
        else:
            selected_idx_vis = get_first_example_id_each_class(Y_test)
        legitimate_examples = X_test[selected_idx_vis]

        rows = [legitimate_examples]
        rows += map(lambda x:x[selected_idx_vis], X_test_adv_list)

        img_fpath = os.path.join(FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash) )
        show_imgs_in_rows(rows, img_fpath)
        print ('\n===Adversarial image examples are saved in ', img_fpath)

        # TODO: output the prediction and confidence for each example, both legitimate and adversarial.


    # 6. Evaluate defense techniques.
    if FLAGS.defense == 'feature_squeezing':
        """
        Test the accuracy with feature squeezing filters.
        """
        from defenses.feature_squeezing.robustness import calculate_squeezed_accuracy_new

        # Calculate the accuracy of legitimate examples for only once.
        csv_fpath = "%s_%s_robustness.csv" % (task_id, attack_string_hash)
        print ("Saving robustness test results at %s" % csv_fpath)
        csv_fpath = os.path.join(FLAGS.result_folder, csv_fpath)
        calculate_squeezed_accuracy_new(model, Y_test, X_test, attack_string_list, X_test_adv_list, csv_fpath)


    # 7. Detection experiment. 
    # All data should be discretized to uint8.
    X_test_adv_discretized_list = [ reduce_precision_np(X_test_adv, 256) for X_test_adv in X_test_adv_list]
    del X_test_adv_list

    if FLAGS.detection == 'feature_squeezing':
        from utils.detection import evalulate_detection_test, get_detection_train_test_set

        # 7.1 Prepare the dataset for detection.
        X_detect_train, Y_detect_train, X_detect_test, Y_detect_test, test_idx, failed_adv_idx = \
                    get_detection_train_test_set(X_test_all, Y_test, X_test_adv_discretized_list, predict_func=model.predict)

        # 7.2 Enumerate all specified detection methods.
        # Take Feature Squeezing as an example.

        csv_fname = "%s_attacks_%s_detection_two_filters_%s_raw_adv.csv" % (task_id, attack_string_hash, FLAGS.detection)
        detection_csv_fpath = os.path.join(FLAGS.result_folder, csv_fname)
        to_csv = []

        from defenses.feature_squeezing.detection import FeatureSqueezingDetector
        from sklearn.metrics import roc_curve, auc
        fsd = FeatureSqueezingDetector(model, task_id, attack_string_hash)

        # TODO: Automatically get the suitable squeezers through robustness test with legitimate examples.
        # squeezers_name = fsd.select_squeezers(X_test, Y_test, accuracy_preserved=0.9)

        if FLAGS.dataset_name == "MNIST":
            squeezers_name = ['median_smoothing_2', 'median_smoothing_3', 'binary_filter']
        elif FLAGS.dataset_name == "CIFAR-10":
            squeezers_name = ["bit_depth_6", 'median_smoothing_1_2', 'median_smoothing_2_1','median_smoothing_2']
        elif FLAGS.dataset_name == "ImageNet":
            squeezers_name = ["bit_depth_5", 'median_smoothing_1_2', 'median_smoothing_2_1','median_smoothing_2']

        # best_metrics = fsd.view_adv_propagation(X_test, X_test_adv_list[0], squeezers_name)
        # best_metrics = [[len(model.layers)-1, 'none', 'kl_f'], [len(model.layers)-1, 'none', 'l1'], [len(model.layers)-1, 'none', 'l2'], \
                        # [len(model.layers)-1, 'unit_norm', 'l1'], [len(model.layers)-1, 'unit_norm', 'l2']]
        best_metrics = [[len(model.layers)-1, 'none', 'l1']]

        for layer_id, normalizer_name, metric_name in best_metrics:
            fsd.set_config(layer_id, normalizer_name, metric_name, squeezers_name)
            print ("===Detection config: Layer-%d, Metric-%s, Norm-%s" % (layer_id, metric_name, normalizer_name))

            csv_fpath = "%s_distances_%s_%s_layer_%d.csv" % (task_id, metric_name, normalizer_name, layer_id)
            csv_fpath = os.path.join(FLAGS.result_folder, csv_fpath)

            fsd.output_distance_csv([X_test_all] + X_test_adv_discretized_list, ['legitimate'] + attack_string_list, csv_fpath)

            # continue

            threshold = fsd.train(X_detect_train, Y_detect_train)
            Y_detect_pred, distances = fsd.test(X_detect_test)

            accuracy, tpr, fpr = evalulate_detection_test(Y_detect_test, Y_detect_pred)
            fprs, tprs, thresholds = roc_curve(Y_detect_test, distances)
            roc_auc = auc(fprs, tprs)

            print ("ROC-AUC: %.2f, Accuracy: %.2f, TPR: %.2f, FPR: %.2f, Threshold: %.2f." % (roc_auc, accuracy, tpr, fpr, threshold))

            ret = {}
            ret['threshold'] = threshold
            ret['accuracy'] = accuracy
            ret['fpr'] = fpr
            ret['tpr'] = tpr
            ret['roc_auc'] = roc_auc

            # index of false negatives
            fn_idx = np.where((Y_detect_test == True) & (Y_detect_pred == False))
            # index in Y_detect.
            fn_idx_Y_test = np.array(test_idx)[fn_idx]

            nb_failed_as_negative = len(fn_idx_Y_test) - len(set(fn_idx_Y_test) - set(failed_adv_idx))
            print ("%d/%d failed adv. examples in false negatives." % (nb_failed_as_negative, len(fn_idx_Y_test)))

            ret['fn'] = len(fn_idx_Y_test)
            ret['failed_adv_as_fn'] = nb_failed_as_negative

            tp_idx = np.where((Y_detect_test == True) & (Y_detect_pred == True))
            tp_idx_Y_test = np.array(test_idx)[tp_idx]
            nb_failed_as_positive = len(tp_idx_Y_test) - len(set(tp_idx_Y_test) - set(failed_adv_idx))
            print ("%d/%d failed adv. examples in true positives." % (nb_failed_as_positive, len(tp_idx_Y_test)))

            ret['layer_id'] = layer_id
            ret['normalizer'] = normalizer_name
            ret['distance_metric'] = metric_name
            to_csv.append(ret)

        fieldnames = ['layer_id', 'distance_metric', 'normalizer', 'roc_auc', 'accuracy', 'tpr', 'fpr', 'threshold', 'failed_adv_as_fn', 'fn']
        write_to_csv(to_csv, detection_csv_fpath, fieldnames)