def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     architecture=ARCHITECTURE,
                     load_model=LOAD_MODEL,
                     ckpt_dir='None',
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(int(time.time() * 1000) % 2**31)
    np.random.seed(int(time.time() * 1001) % 2**31)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')

    pgd_train = None
    if FLAGS.load_pgd_train_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_train_samples))
        x_train = np.load(os.path.join(pgd_path, 'train_clean.npy'))
        y_train = np.load(os.path.join(pgd_path, 'train_y.npy'))
        pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy'))
        if x_train.shape[1] == 3:
            x_train = x_train.transpose((0, 2, 3, 1))
            pgd_train = pgd_train.transpose((0, 2, 3, 1))
        if len(y_train.shape) == 1:
            y_tmp = np.zeros((len(y_train), np.max(y_train) + 1),
                             y_train.dtype)
            y_tmp[np.arange(len(y_tmp)), y_train] = 1.
            y_train = y_tmp

    x_test, y_test = data.get_set('test')
    pgd_test = None
    if FLAGS.load_pgd_test_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_test_samples))
        x_test = np.load(os.path.join(pgd_path, 'test_clean.npy'))
        y_test = np.load(os.path.join(pgd_path, 'test_y.npy'))
        pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy'))
        if x_test.shape[1] == 3:
            x_test = x_test.transpose((0, 2, 3, 1))
            pgd_test = pgd_test.transpose((0, 2, 3, 1))
        if len(y_test.shape) == 1:
            y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype)
            y_tmp[np.arange(len(y_tmp)), y_test] = 1.
            y_test = y_tmp

    train_idcs = np.arange(len(x_train))
    np.random.shuffle(train_idcs)
    x_train, y_train = x_train[train_idcs], y_train[train_idcs]
    if pgd_train is not None:
        pgd_train = pgd_train[train_idcs]
    test_idcs = np.arange(len(x_test))[:FLAGS.test_size]
    np.random.shuffle(test_idcs)
    x_test, y_test = x_test[test_idcs], y_test[test_idcs]
    if pgd_test is not None:
        pgd_test = pgd_test[test_idcs]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    pgd_params = {
        # ord: ,
        'eps': FLAGS.eps,
        'eps_iter': (FLAGS.eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }
    cw_params = {
        'binary_search_steps': FLAGS.cw_search_steps,
        'max_iterations': FLAGS.cw_steps,  #1000
        'abort_early': True,
        'learning_rate': FLAGS.cw_lr,
        'batch_size': batch_size,
        'confidence': 0,
        'initial_const': FLAGS.cw_c,
        'clip_min': 0,
        'clip_max': 255
    }

    # Madry dosen't divide by 255
    x_train *= 255
    x_test *= 255
    if pgd_train is not None:
        pgd_train *= 255
    if pgd_test is not None:
        pgd_test *= 255

    print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train)))
    print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test)))

    print(
        'clip_min : {}, clip_max : {}  >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<'
        .format(pgd_params['clip_min'], pgd_params['clip_max']))

    rng = np.random.RandomState()  # [2017, 8, 30]
    debug_dict = dict() if FLAGS.save_debug_dict else None

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                predictor=None,
                x_adv=None):
        if predictor is None:
            acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        else:
            do_eval(preds, x_set, y_set, report_key, is_adv=is_adv)
            if x_adv is not None:
                x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set],
                                        batch_size=batch_size)
                assert x_set.shape == x_set_adv.shape
                x_set = x_set_adv
            n_batches = math.ceil(x_set.shape[0] / batch_size)
            p_set, p_det = np.concatenate([
                predictor.send(x_set[b * batch_size:(b + 1) * batch_size])
                for b in tqdm.trange(n_batches)
            ]).T
            acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean()
            # if is_adv:
            # import IPython ; IPython.embed() ; exit(1)
            if FLAGS.save_debug_dict:
                debug_dict['x_set'] = x_set
                debug_dict['y_set'] = y_set
                ddfn = 'logs/debug_dict_{}.pkl'.format(
                    'adv' if is_adv else 'clean')
                if not os.path.exists(ddfn):
                    with open(ddfn, 'wb') as f:
                        pickle.dump(debug_dict, f)
                debug_dict.clear()
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples %s: %0.4f' %
                  (report_text, 'with correction'
                   if predictor is not None else 'without correction', acc))
            if is_adv is not None:
                label = 'test_acc_{}_{}'.format(
                    report_text, 'corrected' if predictor else 'uncorrected')
                swriter.add_scalar(label, acc)
                if predictor is not None:
                    detect = np.equal(p_det, is_adv).mean()
                    label = 'test_det_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    print(label, detect)
                    swriter.add_scalar(label, detect)
                    label = 'test_dac_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    swriter.add_scalar(
                        label,
                        np.equal(p_set,
                                 y_set[:len(p_set)].argmax(-1))[np.equal(
                                     p_det, is_adv)].mean())

        return acc

    if clean_train:
        if architecture == 'ConvNet':
            model = ModelAllConvolutional('model1',
                                          nb_classes,
                                          nb_filters,
                                          input_shape=[32, 32, 3])
        elif architecture == 'ResNet':
            model = ResNet(scope='ResNet')
        else:
            raise Exception('Specify valid classifier architecture!')

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        if load_model:
            model_name = 'naturally_trained'
            if FLAGS.load_adv_trained:
                model_name = 'adv_trained'
            if ckpt_dir is not 'None':
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(os.path.expanduser(ckpt_dir), model_name))
            else:
                ckpt = tf.train.get_checkpoint_state('./models/' + model_name)
            ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

            saver = tf.train.Saver(var_list=dict(
                (v.name.split('/', 1)[1].split(':')[0], v)
                for v in tf.global_variables()))
            saver.restore(sess, ckpt_path)
            print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

            initialize_uninitialized_global_variables(sess)

        else:

            def evaluate():
                do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

            train(sess,
                  loss,
                  None,
                  None,
                  dataset_train=dataset_train,
                  dataset_size=dataset_size,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params())

        logits_op = preds.op
        while logits_op.type != 'MatMul':
            logits_op = logits_op.inputs[0].op
        latent_x_tensor, weights = logits_op.inputs
        logits_tensor = preds

        nb_classes = weights.shape[-1].value

        if not FLAGS.save_pgd_samples:
            noise_eps = FLAGS.noise_eps.split(',')
            if FLAGS.noise_eps_detect is None:
                FLAGS.noise_eps_detect = FLAGS.noise_eps
            noise_eps_detect = FLAGS.noise_eps_detect.split(',')
            if pgd_train is not None:
                pgd_train = pgd_train[:FLAGS.n_collect]
            if not FLAGS.passthrough:
                predictor = tf_robustify.collect_statistics(
                    x_train[:FLAGS.n_collect],
                    y_train[:FLAGS.n_collect],
                    x,
                    sess,
                    logits_tensor=logits_tensor,
                    latent_x_tensor=latent_x_tensor,
                    weights=weights,
                    nb_classes=nb_classes,
                    p_ratio_cutoff=FLAGS.p_ratio_cutoff,
                    noise_eps=noise_eps,
                    noise_eps_detect=noise_eps_detect,
                    pgd_eps=pgd_params['eps'],
                    pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
                    pgd_iters=pgd_params['nb_iter'],
                    save_alignments_dir='logs/stats'
                    if FLAGS.save_alignments else None,
                    load_alignments_dir=os.path.expanduser(
                        '~/data/advhyp/madry/stats')
                    if FLAGS.load_alignments else None,
                    clip_min=pgd_params['clip_min'],
                    clip_max=pgd_params['clip_max'],
                    batch_size=batch_size,
                    num_noise_samples=FLAGS.num_noise_samples,
                    debug_dict=debug_dict,
                    debug=FLAGS.debug,
                    targeted=False,
                    pgd_train=pgd_train,
                    fit_classifier=FLAGS.fit_classifier,
                    clip_alignments=FLAGS.clip_alignments,
                    just_detect=FLAGS.just_detect)
            else:

                def _predictor():
                    _x = yield
                    while (_x is not None):
                        _y = sess.run(preds, {x: _x}).argmax(-1)
                        _x = yield np.stack((_y, np.zeros_like(_y)), -1)

                predictor = _predictor()
            next(predictor)
            if FLAGS.save_alignments:
                exit(0)

            # Evaluate the accuracy of the model on clean examples
            acc_clean = do_eval(preds,
                                x_test,
                                y_test,
                                'clean_train_clean_eval',
                                False,
                                predictor=predictor)

        # Initialize the PGD attack object and graph
        if FLAGS.attack == 'pgd':
            pgd = MadryEtAl(model, sess=sess)
            adv_x = pgd.generate(x, **pgd_params)
        elif FLAGS.attack == 'cw':
            cw = CarliniWagnerL2(model, sess=sess)
            adv_x = cw.generate(x, **cw_params)
        elif FLAGS.attack == 'mean':
            pgd = MadryEtAl(model, sess=sess)
            mean_eps = FLAGS.mean_eps * FLAGS.eps

            def _attack_mean(x):
                x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1))
                x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps,
                                                     mean_eps)
                x_noisy = tf.clip_by_value(x_noisy, 0, 255)
                x_pgd = pgd.generate(x_noisy, **pgd_params)
                x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps)
                x_clip = tf.maximum(x_clip, x_many - FLAGS.eps)
                x_clip = tf.clip_by_value(x_clip, 0, 255)
                return x_clip

            adv_x = tf.map_fn(_attack_mean, x)
            adv_x = tf.reduce_mean(adv_x, 1)

        preds_adv = model.get_logits(adv_x)

        if FLAGS.save_pgd_samples:
            for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test,
                                                              'test')):
                train_batches = math.ceil(len(ds) / FLAGS.batch_size)
                train_pgd = np.concatenate([
                    sess.run(adv_x, {
                        x:
                        ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size]
                    }) for b in tqdm.trange(train_batches)
                ])
                np.save('logs/{}_clean.npy'.format(name), ds / 255.)
                np.save('logs/{}_y.npy'.format(name), y)
                train_pgd /= 255.
                np.save('logs/{}_pgd.npy'.format(name), train_pgd)
            exit(0)

        # Evaluate the accuracy of the model on adversarial examples
        if not FLAGS.load_pgd_test_samples:
            acc_pgd = do_eval(preds_adv,
                              x_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor,
                              x_adv=adv_x)
        else:
            acc_pgd = do_eval(preds,
                              pgd_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor)
        swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0)

        print('Repeating the process, using adversarial training')

    exit(0)
    # Create a new model and train it to be robust to MadryEtAl
    if architecture == 'ConvNet':
        model2 = ModelAllConvolutional('model2',
                                       nb_classes,
                                       nb_filters,
                                       input_shape=[32, 32, 3])
    elif architecture == 'ResNet':
        model = ResNet()
    else:
        raise Exception('Specify valid classifier architecture!')

    pgd2 = MadryEtAl(model2, sess=sess)

    def attack(x):
        return pgd2.generate(x, **pgd_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For some attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    if load_model:
        if ckpt_dir is not 'None':
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained'))
        else:
            ckpt = tf.train.get_checkpoint_state('./models/adv_trained')
        ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

        assert ckpt_path and tf_model_load(
            sess, file_path=ckpt_path), '\nMODEL LOADING FAILED'
        print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

        initialize_uninitialized_global_variables(sess)

    else:

        def evaluate2():
            # Accuracy of adversarially trained model on legitimate test inputs
            do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
            # Accuracy of the adversarially trained model on adversarial
            # examples
            do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

        # Perform and evaluate adversarial training
        train(sess,
              loss2,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate2,
              args=train_params,
              rng=rng,
              var_list=model2.get_params())

    # Evaluate model
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    return report
Ejemplo n.º 2
0
def craft_one_type(sess,
                   model,
                   X,
                   Y,
                   dataset,
                   attack,
                   batch_size,
                   log_path=None,
                   fp_path=None,
                   model_logits=None):
    """
    TODO
    :param sess:
    :param model:
    :param X:
    :param Y:
    :param dataset:
    :param attack:
    :param batch_size:
    :return:
    """
    print("entered")
    if not log_path is None:
        PATH_DATA = log_path

    if attack == 'fgsm':
        # FGSM attack
        print('Crafting fgsm adversarial samples...')
        X_adv = fast_gradient_sign_method(sess,
                                          model,
                                          X,
                                          Y,
                                          eps=ATTACK_PARAMS[dataset]['eps'],
                                          clip_min=CLIP_MIN,
                                          clip_max=CLIP_MAX,
                                          batch_size=batch_size)
    elif attack == 'adapt-fgsm':
        # Adaptive FGSM attack
        print('Crafting fgsm adversarial samples...')

        X_adv = adaptive_fast_gradient_sign_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size,
            log_dir=fp_path,
            model_logits=model_logits,
            dataset=dataset)
    elif attack == 'adapt-bim-b':
        # BIM attack
        print('Crafting %s adversarial samples...' % attack)
        X_adv = adaptive_basic_iterative_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            eps_iter=ATTACK_PARAMS[dataset]['eps_iter'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size,
            log_dir=fp_path,
            model_logits=model_logits,
            dataset=dataset)
    elif attack in ['bim-a', 'bim-b']:
        # BIM attack
        print('Crafting %s adversarial samples...' % attack)
        its, results = basic_iterative_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            eps_iter=ATTACK_PARAMS[dataset]['eps_iter'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size)
        if attack == 'bim-a':
            # BIM-A
            # For each sample, select the time step where that sample first
            # became misclassified
            X_adv = np.asarray([results[its[i], i] for i in range(len(Y))])
        else:
            # BIM-B
            # For each sample, select the very last time step
            X_adv = results[-1]
    elif attack == 'jsma':
        # JSMA attack
        print('Crafting jsma adversarial samples. This may take > 5 hours')
        X_adv = saliency_map_method(sess,
                                    model,
                                    X,
                                    Y,
                                    theta=1,
                                    gamma=0.1,
                                    clip_min=CLIP_MIN,
                                    clip_max=CLIP_MAX)
    elif attack == 'cw-l2':
        # C&W attack
        print(
            'Crafting %s examples. This takes > 5 hours due to internal grid search'
            % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']
        cw_attack = CarliniL2(sess,
                              model,
                              image_size,
                              num_channels,
                              num_labels,
                              batch_size=batch_size)
        X_adv = cw_attack.attack(X, Y)
    elif attack == 'cw-fp':
        # C&W attack to break LID detector
        print(
            'Crafting %s examples. This takes > 5 hours due to internal grid search'
            % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']
        cw_attack = CarliniFP_2vars(sess,
                                    model,
                                    image_size,
                                    num_channels,
                                    num_labels,
                                    batch_size=batch_size,
                                    fp_dir=fp_path)
        X_adv = cw_attack.attack(X, Y)

    elif attack == 'spsa':
        binary_steps = 1
        batch_shape = X.shape
        X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:])
        Y_label = tf.placeholder(tf.int32, shape=(1, ))
        alpha = tf.placeholder(tf.float32, shape=(1, ))

        num_samples = np.shape(X)[0]
        # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X))
        _min = np.min(X)
        _max = np.max(X)
        print(_max, _min)
        print(tf.trainable_variables())
        filters = sess.run('conv1/kernel:0')
        biases = 0.0 * sess.run('conv1/bias:0')
        shift_model = Sequential()
        if (dataset == 'mnist'):
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(1, 28, 28)))
        else:
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(3, 32, 32)))

        X_input_2 = tf.placeholder(tf.float32,
                                   shape=(None, ) + batch_shape[1:])

        correction_term = shift_model(X_input_2)
        if (dataset == 'mnist'):
            X_correction = -0.5 * np.ones(
                (1, 1, 28, 28)
            )  # We will shift the image up by 0.5, so this is the correction
        else:
            X_correction = -0.5 * np.ones(
                (1, 3, 32, 32)
            )  # We will shift the image up by 0.5, so this is the correction

        # for PGD

        shift_model.layers[0].set_weights([filters, biases])
        bias_correction_terms = (sess.run(correction_term,
                                          feed_dict={X_input_2: X_correction}))
        for i in range(32):
            biases[i] = bias_correction_terms[0, i, 0, 0]
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        original_biases = model.layers[0].get_weights()[1]
        original_weights = model.layers[0].get_weights()[0]
        model.layers[0].set_weights(
            [original_weights, original_biases + biases])
        #Correct model for input shift

        X = X + 0.5  #shift input to make it >=0
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        # check accuracy post correction of input and model
        print('Crafting %s examples. Using Cleverhans' % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']

        from cleverhans.utils_keras import KerasModelWrapper
        wrapped_model = KerasModelWrapper(model)

        if dataset == "mnist":
            wrapped_model.nb_classes = 10
        elif dataset == "cifar":
            wrapped_model.nb_classes = 10
        else:
            wrapped_model.nb_classes = 10

        real_batch_size = X.shape[0]
        X_adv = None

        spsa = SPSA(wrapped_model, back='tf', sess=sess)
        spsa_params = {
            "epsilon": ATTACK_PARAMS[dataset]['eps'],
            'num_steps': 100,
            'spsa_iters': 1,
            'early_stop_loss_threshold': None,
            'is_targeted': False,
            'is_debug': False
        }
        X_adv_spsa = spsa.generate(X_input,
                                   alpha=alpha,
                                   y=Y_label,
                                   fp_path=fp_path,
                                   **spsa_params)

        for i in range(num_samples):

            # rescale to format TF wants

            #X_i_norm = (X[i] - _min)/(_max-_min)

            X_i_norm = X[i]
            # Run attack
            best_res = None
            ALPHA = np.ones(1) * 0.1
            lb = 1.0e-2
            ub = 1.0e2
            for j in range(binary_steps):
                res = sess.run(X_adv_spsa,
                               feed_dict={
                                   X_input: np.expand_dims(X_i_norm, axis=0),
                                   Y_label: np.array([np.argmax(Y[i])]),
                                   alpha: ALPHA
                               })
                if (dataset == 'mnist'):
                    X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28])
                else:
                    X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32])
                pred = model(X_place)
                model_op = sess.run(pred, feed_dict={X_place: res})

                if (not np.argmax(model_op) == np.argmax(Y[i, :])):
                    lb = ALPHA[0]
                else:
                    ub = ALPHA[0]
                ALPHA[0] = 0.5 * (lb + ub)
                print(ALPHA)
                if (best_res is None):
                    best_res = res
                else:
                    if (not np.argmax(model_op) == np.argmax(Y[i, :])):
                        best_res = res
                        pass

            # Rescale result back to our scale

            if (i == 0):
                X_adv = best_res
            else:
                X_adv = np.concatenate((X_adv, best_res), axis=0)

        _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the adversarial test set: %0.2f%%" %
              (100.0 * acc))
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))

        #Revert model to original
        model.layers[0].set_weights([original_weights, original_biases])
        #Revert adv shift
        X_adv = X_adv - 0.5
        X = X - 0.5  #Not used but just for logging purposes
    elif attack == 'adapt-pgd':
        binary_steps = 1
        rand_starts = 2
        batch_shape = X.shape
        X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:])
        Y_label = tf.placeholder(tf.int32, shape=(1, ))
        alpha = tf.placeholder(tf.float32, shape=(1, ))

        num_samples = np.shape(X)[0]
        # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X))
        _min = np.min(X)
        _max = np.max(X)
        print(_max, _min)
        print(tf.trainable_variables())
        filters = sess.run('conv1/kernel:0')
        biases = 0.0 * sess.run('conv1/bias:0')
        shift_model = Sequential()
        if (dataset == 'mnist'):
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(1, 28, 28)))
        else:
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(3, 32, 32)))

        X_input_2 = tf.placeholder(tf.float32,
                                   shape=(None, ) + batch_shape[1:])

        correction_term = shift_model(X_input_2)
        if (dataset == 'mnist'):
            X_correction = -0.5 * np.ones(
                (1, 1, 28, 28)
            )  # We will shift the image up by 0.5, so this is the correction
        else:
            X_correction = -0.5 * np.ones(
                (1, 3, 32, 32)
            )  # We will shift the image up by 0.5, so this is the correction

        # for PGD

        shift_model.layers[0].set_weights([filters, biases])
        bias_correction_terms = (sess.run(correction_term,
                                          feed_dict={X_input_2: X_correction}))
        for i in range(32):
            biases[i] = bias_correction_terms[0, i, 0, 0]
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        original_biases = model.layers[0].get_weights()[1]
        original_weights = model.layers[0].get_weights()[0]
        model.layers[0].set_weights(
            [original_weights, original_biases + biases])
        #Correct model for input shift

        X = X + 0.5  #shift input to make it >=0

        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        # check accuracy post correction of input and model
        print('Crafting %s examples. Using Cleverhans' % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']

        from cleverhans.utils_keras import KerasModelWrapper
        wrapped_model = KerasModelWrapper(model)

        if dataset == "mnist":
            wrapped_model.nb_classes = 10
        elif dataset == "cifar":
            wrapped_model.nb_classes = 10
        else:
            wrapped_model.nb_classes = 10

        real_batch_size = X.shape[0]
        X_adv = None

        pgd = MadryEtAl(wrapped_model, back='tf', sess=sess)
        X_adv_pgd, adv_loss_fp = pgd.generate(X_input,
                                              eps=0.3,
                                              eps_iter=0.02,
                                              clip_min=0.0,
                                              clip_max=1.0,
                                              nb_iter=20,
                                              rand_init=True,
                                              fp_path=fp_path,
                                              alpha=alpha)

        for i in range(num_samples):
            # rescale to format TF wants

            #X_i_norm = (X[i] - _min)/(_max-_min)

            X_i_norm = X[i]
            # Run attack
            best_res = None
            best_res_loss = 1000000.0
            ALPHA = np.ones(1) * 0.1
            lb = 1.0e-2
            ub = 1.0e2
            for j in range(binary_steps):
                bin_flag = 0
                for jj in range(rand_starts):

                    [res, res_loss] = sess.run(
                        [X_adv_pgd, adv_loss_fp],
                        feed_dict={
                            X_input: np.expand_dims(X[i], axis=0),
                            Y_label: np.array([np.argmax(Y[i])]),
                            alpha: ALPHA
                        })

                    if (dataset == 'mnist'):
                        X_place = tf.placeholder(tf.float32,
                                                 shape=[1, 1, 28, 28])
                    else:
                        X_place = tf.placeholder(tf.float32,
                                                 shape=[1, 3, 32, 32])

                    pred = model(X_place)
                    model_op = sess.run(pred, feed_dict={X_place: res})

                    if (best_res is None):
                        best_res = res
                    else:
                        if ((not np.argmax(model_op) == np.argmax(Y[i, :]))
                                and res_loss < best_res_loss):
                            best_res = res
                            best_res_loss = res_loss
                            bin_flag = 1
                            pass
                if (bin_flag == 1):
                    lb = ALPHA[0]
                else:
                    ub = ALPHA[0]
                ALPHA[0] = 0.5 * (lb + ub)
                print(ALPHA)
            # Rescale result back to our scale

            if (i == 0):
                X_adv = best_res
            else:
                X_adv = np.concatenate((X_adv, best_res), axis=0)

        _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the adversarial test set: %0.2f%%" %
              (100.0 * acc))
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))

        #Revert model to original
        model.layers[0].set_weights([original_weights, original_biases])
        #Revert adv shift
        X_adv = X_adv - 0.5
        X = X - 0.5  #Not used but just for logging purposes

        pass

    if ("adapt" in attack or "fp" in attack or "spsa" in attack):
        [m, _, _, _] = (np.shape(X_adv))
        cropped_X_adv = []
        cropped_Y = []
        cropped_X = []
        if (dataset == 'mnist'):
            X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28])
            pred = model(X_place)
        else:
            X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32])
            pred = model(X_place)
        for i in range(m):
            logits_op = sess.run(pred,
                                 feed_dict={X_place: X_adv[i:i + 1, :, :, :]})
            if (not np.argmax(logits_op) == np.argmax(Y[i, :])):
                cropped_Y.append(Y[i, :])
                cropped_X_adv.append(X_adv[i, :, :, :])
                cropped_X.append(X[i, :, :, :])
        X_adv = np.array(cropped_X_adv)
        X = np.array(cropped_X)
        Y = np.array(cropped_Y)

        f = open(
            os.path.join(log_path, 'Random_Test_%s_%s.p' % (dataset, attack)),
            'w')

        pickle.dump({"adv_input": X, "adv_labels": Y}, f)
        f.close()

    #np.save(os.path.join(PATH_DATA, 'Adv_%s_%s.npy' % (dataset, attack)), X_adv)
    f = open(os.path.join(log_path, 'Adv_%s_%s.p' % (dataset, attack)), 'w')

    pickle.dump({"adv_input": X_adv, "adv_labels": Y}, f)
    f.close()
    _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
    print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
    l2_diff = np.linalg.norm(X_adv.reshape((len(X), -1)) - X.reshape(
        (len(X), -1)),
                             axis=1).mean()
    print("Average L-2 perturbation size of the %s attack: %0.2f" %
          (attack, l2_diff))
    if (("adapt" in attack) or ("cw-fp" in attack)):
        return (X, X_adv, Y)
    else:
        print(Y.shape)
        return (X_adv, Y)
Ejemplo n.º 3
0
def main(argv=None):
    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter
    
    #### EMPIR extra flags
    lowprecision=FLAGS.lowprecision
    abits=FLAGS.abits
    wbits=FLAGS.wbits
    abitsList=FLAGS.abitsList
    wbitsList=FLAGS.wbitsList
    stocRound=True if FLAGS.stocRound else False
    rand=FLAGS.rand 
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree=True if FLAGS.ensembleThree else False
    abits2=FLAGS.abits2
    wbits2=FLAGS.wbits2
    abits2List=FLAGS.abits2List
    wbits2List=FLAGS.wbits2List
    ####
   
    save = False
    train_from_scratch = False

    #### Imagenet flags
    imagenet_path = FLAGS.imagenet_path
    if imagenet_path is None:
        print("Error: Imagenet data path not specified")
        sys.exit(1)

    # Imagenet specific dimensions
    img_rows = _DEFAULT_IMAGE_SIZE
    img_cols = _DEFAULT_IMAGE_SIZE
    channels = _NUM_CHANNELS
    nb_classes = _NUM_CLASSES

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)
    
    # Get imagenet datasets
    train_dataset, test_dataset = data_imagenet(nb_epochs, batch_size, imagenet_path)

    # Creating a initializable iterators
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    # Getting next elements from the iterators
    next_test_element = test_iterator.get_next()
    next_train_element = train_iterator.get_next()
    
    train_x, train_y = train_iterator.get_next()
    test_x, test_y = test_iterator.get_next()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name="phase")

    logits_scalar = tf.placeholder_with_default(
        INIT_T, shape=(), name="logits_temperature")
   
    if ensembleThree: 
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given
    
    if ensembleThree: 
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified for first model
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec1 = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the first model")  
           sys.exit(1)
       else: 
           fixedPrec1 = 0
       
       if (wbits2List is None) or (abits2List is None): # Layer wise separate quantization not specified for second model
           if (wbits2==0) or (abits2==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec2 = 1
       elif (len(wbits2List) != 6) or (len(abits2List) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the second model")  
           sys.exit(1)
       else: 
           fixedPrec2 = 0

       if (fixedPrec2 != 1) or (fixedPrec1 != 1): # Atleast one of the models have separate precisions per layer
           fixedPrec=0
           print("Within atleast one model has separate precisions")
           if (fixedPrec1 == 1): # first layer has fixed precision
               abitsList = (abits, abits, abits, abits, abits, abits)
               wbitsList = (wbits, wbits, wbits, wbits, wbits, wbits)
           if (fixedPrec2 == 1): # second layer has fixed precision
               abits2List = (abits2, abits2, abits2, abits2, abits2, abits2)
               wbits2List = (wbits2, wbits2, wbits2, wbits2, wbits2, wbits2)
       else:
           fixedPrec=1
       
       if (train_from_scratch):
           print ("The ensemble model cannot be trained from scratch")
           sys.exit(1)
       if fixedPrec == 1:
           from cleverhans_tutorials.tutorial_models import make_ensemble_three_alexnet
           model = make_ensemble_three_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_three_combined_alexnet
           model = make_layerwise_three_combined_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    elif lowprecision:
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags")
               sys.exit(1)
           else:
               fixedPrec = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer")  
           sys.exit(1)
       else: 
           fixedPrec = 0
       
       if fixedPrec:
           
           ### For training from scratch
           from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_alexnet
           model = make_basic_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbits, abits, input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes)  
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_alexnet
           model = make_layerwise_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbitsList, abitsList, 
            input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    else:
        ### For training from scratch
        from cleverhans_tutorials.tutorial_models import make_basic_alexnet_from_scratch
        model = make_basic_alexnet_from_scratch(phase, logits_scalar, 'fp_', input_shape=(
        None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
    ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        else: #default below
            acc = model_eval_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an Imagenet model
    train_params = {
        'lowprecision': lowprecision,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {'eps': MAX_EPS, 'eps_iter': 0.01,
                                   'nb_iter': nb_iter}
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {'eps': tf.abs(tf.truncated_normal(
                shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev))}
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar
   #  if adv:
   #      from cleverhans.attacks import FastGradientMethod
   #      fgsm = FastGradientMethod(model, back='tf', sess=sess)
   #      fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
   #      adv_x_train = fgsm.generate(x, phase, **fgsm_params)
   #      preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})
        
        # do clean training for 'nb_epochs' or 'delay' epochs with learning rate reducing with time
        model_train_imagenet2(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train_imagenet(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                    predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng)
    else:
        if ensembleThree: ## ensembleThree models have to loaded from different paths
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            # First 11 variables from path1
            stored_variables = ['lp_conv1_init/k', 'lp_conv1_init/b', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_conv4_init/k', 'lp_conv5_init/k', 'lp_ip1init/W', 'lp_ip1init/b', 'lp_ip2init/W', 'lp_logits_init/W', 'lp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[:11]))) # only dict was messing with the order
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            # Second 11 variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[11:22])))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Third 11 variables from path3
            stored_variables = ['fp_conv1_init/k', 'fp_conv1_init/b', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_conv4_init/k', 'fp_conv5_init/k', 'fp_ip1init/W', 'fp_ip1init/b', 'fp_ip2init/W', 'fp_logits_init/W', 'fp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[22:33])))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
            # Next 24 batch norm variables from path1
            stored_variables = ['lp__batchNorm1/batch_normalization/gamma', 'lp__batchNorm1/batch_normalization/beta', 'lp__batchNorm1/batch_normalization/moving_mean', 'lp__batchNorm1/batch_normalization/moving_variance', 'lp__batchNorm2/batch_normalization/gamma', 'lp__batchNorm2/batch_normalization/beta', 'lp__batchNorm2/batch_normalization/moving_mean', 'lp__batchNorm2/batch_normalization/moving_variance', 'lp__batchNorm3/batch_normalization/gamma', 'lp__batchNorm3/batch_normalization/beta', 'lp__batchNorm3/batch_normalization/moving_mean', 'lp__batchNorm3/batch_normalization/moving_variance', 'lp__batchNorm4/batch_normalization/gamma', 'lp__batchNorm4/batch_normalization/beta', 'lp__batchNorm4/batch_normalization/moving_mean', 'lp__batchNorm4/batch_normalization/moving_variance', 'lp__batchNorm5/batch_normalization/gamma', 'lp__batchNorm5/batch_normalization/beta', 'lp__batchNorm5/batch_normalization/moving_mean', 'lp__batchNorm5/batch_normalization/moving_variance', 'lp__batchNorm6/batch_normalization/gamma', 'lp__batchNorm6/batch_normalization/beta', 'lp__batchNorm6/batch_normalization/moving_mean', 'lp__batchNorm6/batch_normalization/moving_variance']

            variable_dict = dict(OrderedDict(zip(stored_variables, variables[33:57])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Next 24 batch norm variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[57:81])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Final 24 batch norm variables from path1
            stored_variables = ['fp__batchNorm1/batch_normalization/gamma', 'fp__batchNorm1/batch_normalization/beta', 'fp__batchNorm1/batch_normalization/moving_mean', 'fp__batchNorm1/batch_normalization/moving_variance', 'fp__batchNorm2/batch_normalization/gamma', 'fp__batchNorm2/batch_normalization/beta', 'fp__batchNorm2/batch_normalization/moving_mean', 'fp__batchNorm2/batch_normalization/moving_variance', 'fp__batchNorm3/batch_normalization/gamma', 'fp__batchNorm3/batch_normalization/beta', 'fp__batchNorm3/batch_normalization/moving_mean', 'fp__batchNorm3/batch_normalization/moving_variance', 'fp__batchNorm4/batch_normalization/gamma', 'fp__batchNorm4/batch_normalization/beta', 'fp__batchNorm4/batch_normalization/moving_mean', 'fp__batchNorm4/batch_normalization/moving_variance', 'fp__batchNorm5/batch_normalization/gamma', 'fp__batchNorm5/batch_normalization/beta', 'fp__batchNorm5/batch_normalization/moving_mean', 'fp__batchNorm5/batch_normalization/moving_variance', 'fp__batchNorm6/batch_normalization/gamma', 'fp__batchNorm6/batch_normalization/beta', 'fp__batchNorm6/batch_normalization/moving_mean', 'fp__batchNorm6/batch_normalization/moving_variance']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[81:105])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path3))
        else: # restoring the model trained using this setup, not a downloaded one
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
            # evaluate()


    # Evaluate the accuracy of the model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    else: #default below
        accuracy = model_eval_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    adv_inputs = test_x #adversarial inputs can be generated from any of the test examples 

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    nb_adv_per_sample = 1
    adv_ys = None
    yname = "y"

    print('Crafting adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({'clip_min': -2.2, 'clip_max': 2.7}) # Since max and min for imagenet turns out to be around -2.11 and 2.12
    eval_params = {'batch_size': batch_size}
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    print("Evaluating un-targeted results")
    if ensembleThree:
        adv_accuracy = model_eval_ensemble_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    else:
        adv_accuracy = model_eval_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    
    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))


    # Close TF session
    sess.close()
Ejemplo n.º 4
0
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

# Set up adversarial example generation using cleverhans
# wrap_model = KerasModelWrapper(model)
pgd_params = {
    'eps': config['eps'],
    'eps_iter': config['eps_iter'],
    'nb_iter': config['nb_iter'],
    'clip_min': 0.,
    'clip_max': 1.,
    'rand_init': True
}
# pgd = MadryEtAl(wrap_model, sess=sess)
pgd = MadryEtAl(model, sess=sess)
x_adv = pgd.generate(x, **pgd_params)
y, x_adv = tf.stop_gradient(y), tf.stop_gradient(x_adv)
# logits_adv = wrap_model.get_logits(x_adv)
logits_adv = model.get_logits(x_adv)
loss_adv = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,
                                                      logits=logits_adv)
loss_adv = tf.reduce_sum(loss_adv)
y_adv = tf.argmax(logits_adv, axis=1)
acc_adv = tf.reduce_sum(
    tf.cast(tf.equal(y_adv, tf.argmax(y, axis=1)), tf.int32))

# Get loss on clean samples
logits_clean = model.get_logits(x)
loss_clean = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,
                                                        logits=logits_clean)
loss_clean = tf.reduce_sum(loss_clean)
def SSGD_resnet_testing(TIN_data, resnet_params, train_params, test_params,
                        all_params):
    # dict for encoding layer variables and output layer variables
    pre_define_vars = {}

    # list of variables to train
    train_vars = []

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, trainable=False)

        # Parameters Declarification
        ######################################

        # encoding (pretrain) layer variables
        with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
            kernel1 = tf.get_variable(
                'kernel1',
                shape=[
                    train_params.enc_kernel_size, train_params.enc_kernel_size,
                    3, train_params.enc_filters
                ],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer_conv2d())
            biases1 = tf.get_variable('biases1',
                                      shape=[train_params.enc_filters],
                                      dtype=tf.float32,
                                      initializer=tf.constant_initializer(0.0))
        pre_define_vars['kernel1'] = kernel1
        pre_define_vars['biases1'] = biases1
        train_vars.append(kernel1)
        train_vars.append(biases1)

        dp_mult = all_params['dp_mult']

        # output layer variables
        with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope:
            stdv = 1.0 / math.sqrt(train_params.hk)
            final_w = tf.get_variable(
                'kernel',
                shape=[train_params.hk, train_params.num_classes],
                dtype=tf.float32,
                initializer=tf.random_uniform_initializer(-stdv, stdv))
            final_b = tf.get_variable('bias',
                                      shape=[train_params.num_classes],
                                      dtype=tf.float32,
                                      initializer=tf.constant_initializer(0.0))
        pre_define_vars['final_w'] = final_w
        pre_define_vars['final_b'] = final_b
        train_vars.append(final_w)
        train_vars.append(final_b)
        ######################################

        # Build a Graph that computes the logits predictions from the inputs
        ######################################
        # input placeholders
        x_sb = tf.placeholder(
            tf.float32,
            [None, train_params.image_size, train_params.image_size, 3],
            name='x_sb')  # input is the bunch of n_batchs
        x_test = tf.placeholder(
            tf.float32,
            [None, train_params.image_size, train_params.image_size, 3],
            name='x_test')

        y_sb = tf.placeholder(
            tf.float32, [None, train_params.num_classes],
            name='y_sb')  # input is the bunch of n_batchs (super batch)
        y_test = tf.placeholder(tf.float32, [None, train_params.num_classes],
                                name='y_test')

        noise = tf.placeholder(tf.float32, [
            None, train_params.enc_h_size, train_params.enc_h_size,
            train_params.enc_filters
        ],
                               name='noise')  # one time

        keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob')

        with tf.device('/gpu:0'):
            # the model for testing
            y_logits_test, _ = test_inference(
                x_sb, train_params.attack_norm_bound * noise, keep_prob,
                pre_define_vars, resnet_params, train_params)
            y_softmax_test = tf.nn.softmax(y_logits_test)
        correct_prediction = tf.equal(tf.argmax(y_logits_test, 1),
                                      tf.argmax(y_sb, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # print all variables
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        all_vars = tf.global_variables()
        print_var_list('all vars', all_vars)
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

        # add selected vars into list
        # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or
        for var in tf.global_variables():
            if 'resnet_model' in var.name and \
              ('conv0' in var.name or
              'fc' in var.name or
              'res3' in var.name or
              'res4' in var.name or
              'res1' in var.name or
              'res2' in var.name) and \
                ('gamma' in var.name or
                  'beta' in var.name or
                  'kernel' in var.name or
                  'bias' in var.name):
                if var not in train_vars:
                    train_vars.append(var)
            elif 'enc_layer' in var.name and \
              ('kernel' in var.name or
                'bias' in var.name or
                'gamma' in var.name or
                'beta' in var.name):
                if var not in train_vars:
                    train_vars.append(var)

        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        print_var_list('train_vars', train_vars)
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

        ######################################

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)

        # start a session with memory growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print("session created")

        # list all checkpoints in ckpt_path
        checkpoint_path_read = os.path.join(os.getcwd() +
                                            test_params.check_point_dir)
        ckpts = tf.train.get_checkpoint_state(checkpoint_path_read)
        print(ckpts)
        # find the ckpt we need to load and load it
        for ckpt in ckpts.all_model_checkpoint_paths:
            # print(ckpt)
            ckpt_step = int(ckpt.split('-')[-1])
            if ckpt_step == test_params.step_to_load:
                saver.restore(sess, ckpt)
                print('model loaded from {}'.format(ckpt))

        # #######################################

        # # setup all attacks
        attack_switch = {
            'fgsm': False,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_output_probs,
            output_layer='probs',
            keep_prob=keep_prob,
            pre_define_vars=pre_define_vars,
            resnet_params=resnet_params,
            train_params=train_params)
        attack_tensor_testing_dict = {}

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        with tf.device('/gpu:0'):
            if attack_switch['ifgsm']:
                print('creating attack tensor of BasicIterativeMethod')
                ifgsm_obj = BasicIterativeMethod(model=ch_model_probs,
                                                 sess=sess)
                attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    clip_min=-1.0,
                    clip_max=1.0)

        # MomentumIterativeMethod
        with tf.device('/gpu:0'):
            if attack_switch['mim']:
                print('creating attack tensor of MomentumIterativeMethod')
                mim_obj = MomentumIterativeMethod(model=ch_model_probs,
                                                  sess=sess)
                attack_tensor_testing_dict['mim'] = mim_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    decay_factor=1.0,
                    clip_min=-1.0,
                    clip_max=1.0)

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        with tf.device('/gpu:0'):
            if attack_switch['madry']:
                print('creating attack tensor of MadryEtAl')
                madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
                attack_tensor_testing_dict['madry'] = madry_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    clip_min=-1.0,
                    clip_max=1.0)

        # #######################################

        sigmaEGM = all_params['sigmaEGM']

        __noiseE = all_params['__noiseE']

        grad_redis = all_params['grad_redis']

        _sensitivity_2 = all_params['_sensitivity_2']

        _sensitivityW = all_params['_sensitivityW']

        Delta_redis = all_params['Delta_redis']

        sigmaHGM = all_params['sigmaHGM']

        __noiseH = all_params['__noiseH']

        __noise_zero = all_params['__noise_zero']

        ####################################

        ####################################
        print('start testing')
        start_time = time.time()
        log_file_path = os.getcwd() + test_params.log_file_path
        log_file = open(log_file_path, 'a', encoding='utf-8')
        attacks_and_benign = test_params.attacks + ['benign']
        #===================adv samples=====================
        # for each eps setting
        for fgsm_eps in test_params.fgsm_eps_list:
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            log_str = ''
            eps_start_time = time.time()
            # cover all test data
            for i in range(test_params.test_epochs):
                test_batch = TIN_data.test.next_batch(
                    test_params.test_batch_size)
                adv_images_dict = {}
                # test for each attack
                for atk in attacks_and_benign:
                    start_time = time.time()
                    if atk not in adv_acc_dict:
                        adv_acc_dict[atk] = 0.0
                        robust_adv_acc_dict[atk] = 0.0
                        robust_adv_utility_dict[atk] = 0.0
                    if atk == 'benign':
                        testing_img = test_batch[0]
                    elif attack_switch[atk]:
                        # if only one gpu available, generate adv samples in-place
                        if atk not in adv_images_dict:
                            adv_images_dict[atk] = sess.run(
                                attack_tensor_testing_dict[atk],
                                feed_dict={
                                    x_sb: test_batch[0],
                                    mu_alpha: [fgsm_eps],
                                    keep_prob: 1.0
                                })
                        testing_img = adv_images_dict[atk]
                    else:
                        continue
                    print('adv gen time: {}s'.format(time.time() - start_time))
                    start_time = time.time()

                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([
                        test_params.test_batch_size, train_params.num_classes
                    ])
                    softmax_predictions = sess.run(
                        y_softmax_test,
                        feed_dict={
                            x_sb: testing_img,
                            noise: (__noiseE + __noiseH) / 2,
                            keep_prob: 1.0
                        })
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(1, test_params.num_samples + 1):
                        if n_draws % 100 == 0:
                            print(
                                'current draws: {}, avg draw time: {}s'.format(
                                    n_draws,
                                    (time.time() - start_time) / n_draws))
                        _noiseE = np.random.normal(
                            0.0, sigmaEGM**2,
                            train_params.enc_h_size * train_params.enc_h_size *
                            train_params.enc_filters).astype(np.float32)
                        _noiseE = np.reshape(_noiseE, [
                            -1, train_params.enc_h_size,
                            train_params.enc_h_size, train_params.enc_filters
                        ])
                        _noise = np.random.normal(
                            0.0, sigmaHGM**2,
                            train_params.enc_h_size * train_params.enc_h_size *
                            train_params.enc_filters).astype(np.float32)
                        _noise = np.reshape(_noise, [
                            -1, train_params.enc_h_size,
                            train_params.enc_h_size, train_params.enc_filters
                        ]) * grad_redis
                        for j in range(test_params.test_batch_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1
                        softmax_predictions = sess.run(
                            y_softmax_test,
                            feed_dict={
                                x_sb:
                                testing_img,
                                noise: (__noiseE + __noiseH) / 2 +
                                (_noiseE + _noise) / 4,
                                keep_prob:
                                1.0
                            })
                        argmax_predictions = np.argmax(softmax_predictions,
                                                       axis=1)
                    final_predictions = predictions_form_argmax
                    is_correct = []
                    is_robust = []
                    for j in range(test_params.test_batch_size):
                        is_correct.append(
                            np.argmax(test_batch[1][j]) == np.argmax(
                                final_predictions[j]))
                        robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                            counts=predictions_form_argmax[j],
                            eta=0.05,
                            dp_attack_size=fgsm_eps,
                            dp_epsilon=train_params.dp_epsilon,
                            dp_delta=0.05,
                            dp_mechanism='gaussian') / dp_mult
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] += np.sum(
                        is_correct) * 1.0 / test_params.test_batch_size
                    robust_adv_acc_dict[atk] += np.sum([
                        a and b for a, b in zip(is_robust, is_correct)
                    ]) * 1.0 / np.sum(is_robust)
                    robust_adv_utility_dict[atk] += np.sum(
                        is_robust) * 1.0 / test_params.test_batch_size

                    dt = time.time() - start_time
                    print('atk test time: {}s'.format(dt), flush=True)
            ##############################
            # average all acc for whole test data
            log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n")
            log_str += 'model trained epoch: {}\n'.format(
                test_params.epoch_to_test)
            log_str += 'fgsm_eps: {}\n'.format(fgsm_eps)
            log_str += 'iter_step_testing: {}\n'.format(
                test_params.iter_step_testing)
            log_str += 'num_samples: {}\n'.format(test_params.num_samples)
            for atk in attacks_and_benign:
                adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs
                robust_adv_acc_dict[
                    atk] = robust_adv_acc_dict[atk] / test_params.test_epochs
                robust_adv_utility_dict[atk] = robust_adv_utility_dict[
                    atk] / test_params.test_epochs
                # added robust prediction
                log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(
                    atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                    robust_adv_utility_dict[atk],
                    robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
            dt = time.time() - eps_start_time
            print('total test time: {}s'.format(dt), flush=True)
            print(log_str, flush=True)
            print('*******************')

            log_file.write(log_str)
            log_file.write('*******************\n')
            log_file.flush()

            dt = time.time() - start_time
        log_file.close()
Ejemplo n.º 6
0
def train(cifar10_data, logfile):
    """Train CIFAR-10 for a number of steps."""
    logfile.write("fgsm_eps \t %g, epsilon \t %d \n" %
                  (fgsm_eps, target_eps[0]))
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        kernel1 = _variable_with_weight_decay(
            'kernel1',
            shape=[3, 3, 3, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases1 = cifar10._variable_on_cpu('biases1', [128],
                                           tf.constant_initializer(0.0))
        #with tf.variable_scope('conv2') as scope:
        kernel2 = _variable_with_weight_decay(
            'kernel2',
            shape=[5, 5, 128, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases2 = cifar10._variable_on_cpu('biases2', [128],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('conv3') as scope:
        kernel3 = _variable_with_weight_decay(
            'kernel3',
            shape=[5, 5, 256, 256],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases3 = cifar10._variable_on_cpu('biases3', [256],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('local4') as scope:
        kernel4 = cifar10._variable_with_weight_decay(
            'kernel4',
            shape=[int(image_size / 4)**2 * 256, hk],
            stddev=0.04,
            wd=0.004)
        biases4 = cifar10._variable_on_cpu('biases4', [hk],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('local5') as scope:
        kernel5 = cifar10._variable_with_weight_decay(
            'kernel5', [hk, 10],
            stddev=np.sqrt(2.0 /
                           (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases5 = cifar10._variable_on_cpu('biases5', [10],
                                           tf.constant_initializer(0.1))

        scale2 = tf.Variable(tf.ones([hk]))
        beta2 = tf.Variable(tf.zeros([hk]))

        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5, scale2, beta2
        ]
        ########

        # Build a Graph that computes the logits predictions from the
        # inference model.
        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivityW = tf.reduce_max(sing_vals)
        dp_delta = 0.05
        #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon
        noise = tf.placeholder(tf.float32, [None, 28, 28, 32])

        dp_mult = attack_norm_bound * math.sqrt(
            2 * math.log(1.25 / dp_delta)) / dp_epsilon
        noise = tf.placeholder(tf.float32, [None, 14, 14, 128])
        sigma = tf.placeholder(tf.float32)
        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise);
        y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise)
        softmax_y_conv = tf.nn.softmax(y_conv)
        y_ = tf.placeholder(tf.float32, [None, 10])

        #logits = inference(images)

        # Calculate loss. Apply Taylor Expansion for the output layer
        loss = cifar10.lossDPSGD(y_conv, y_)

        # noise redistribution #
        grad, = tf.gradients(loss, h_conv1)
        normalized_grad = tf.sign(grad)
        normalized_grad = tf.stop_gradient(normalized_grad)
        normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad,
                                                  axis=(0)))**2
        sum_r = tf.reduce_sum(normalized_grad_r,
                              axis=(0, 1, 2),
                              keepdims=False)
        normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r
        print(normalized_grad_r)

        shape_grad = normalized_grad_r.get_shape().as_list()
        grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]])
        g = tf.transpose(grad_t)
        sing_g_vals = tf.svd(g, compute_uv=False)
        sensitivity_2 = tf.reduce_max(sing_g_vals)
        ########################

        opt = tf.train.GradientDescentOptimizer(lr)

        gw_K1 = tf.gradients(loss, kernel1)[0]
        gb1 = tf.gradients(loss, biases1)[0]

        gw_K2 = tf.gradients(loss, kernel2)[0]
        gb2 = tf.gradients(loss, biases2)[0]

        gw_K3 = tf.gradients(loss, kernel3)[0]
        gb3 = tf.gradients(loss, biases3)[0]

        gw_K4 = tf.gradients(loss, kernel4)[0]
        gb4 = tf.gradients(loss, biases4)[0]

        gw_K5 = tf.gradients(loss, kernel5)[0]
        gb5 = tf.gradients(loss, biases5)[0]

        #clip gradient
        gw_K1 = tf.clip_by_norm(gw_K1, clip_bound)
        gw_K2 = tf.clip_by_norm(gw_K2, clip_bound)
        gw_K3 = tf.clip_by_norm(gw_K3, clip_bound)
        gw_K4 = tf.clip_by_norm(gw_K4, clip_bound)
        gw_K5 = tf.clip_by_norm(gw_K5, clip_bound)

        #perturb
        gw_K1 += tf.random_normal(shape=tf.shape(gw_K1),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K2 += tf.random_normal(shape=tf.shape(gw_K2),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K3 += tf.random_normal(shape=tf.shape(gw_K3),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K4 += tf.random_normal(shape=tf.shape(gw_K4),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K5 += tf.random_normal(shape=tf.shape(gw_K5),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gb1 += tf.random_normal(shape=tf.shape(gb1),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb2 += tf.random_normal(shape=tf.shape(gb2),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb3 += tf.random_normal(shape=tf.shape(gb3),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb4 += tf.random_normal(shape=tf.shape(gb4),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb5 += tf.random_normal(shape=tf.shape(gb5),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size

        # apply gradients and keep tracking moving average of the parameters
        apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1),
                                                 (gb1, biases1),
                                                 (gw_K2, kernel2),
                                                 (gb2, biases2),
                                                 (gw_K3, kernel3),
                                                 (gb3, biases3),
                                                 (gw_K4, kernel4),
                                                 (gb4, biases4),
                                                 (gw_K5, kernel5),
                                                 (gb5, biases5)],
                                                global_step=global_step)
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())
        with tf.control_dependencies(
            [apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity)

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        attack_switch = {
            'fgsm': True,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            image_size=image_size)

        # define each attack method's tensor
        attack_tensor_dict = {}
        # FastGradientMethod
        if attack_switch['fgsm']:
            print('creating attack tensor of FastGradientMethod')
            fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
            x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                                eps=fgsm_eps,
                                                clip_min=-1.0,
                                                clip_max=1.0)  # testing now
            attack_tensor_dict['fgsm'] = x_adv_test_fgsm

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        if attack_switch['ifgsm']:
            print('creating attack tensor of BasicIterativeMethod')
            ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                                  eps=fgsm_eps,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        if attack_switch['mim']:
            print('creating attack tensor of MomentumIterativeMethod')
            mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_mim = mim_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 3,
                                              nb_iter=3,
                                              decay_factor=1.0,
                                              clip_min=-1.0,
                                              clip_max=1.0)
            attack_tensor_dict['mim'] = x_adv_test_mim

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        if attack_switch['madry']:
            print('creating attack tensor of MadryEtAl')
            madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
            #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_madry = madry_obj.generate(x=x,
                                                  eps=fgsm_eps,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['madry'] = x_adv_test_madry
        #====================== attack =========================

        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Privacy accountant
        priv_accountant = accountant.GaussianMomentsAccountant(D)
        privacy_accum_op = priv_accountant.accumulate_privacy_spending(
            [None, None], sigma, batch_size)

        # Build the summary operation based on the TF collection of Summaries.
        #summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / batch_size)) * epochs + 1)  # number of steps
        step_for_epoch = int(math.ceil(D / batch_size))
        #number of steps for one epoch

        s = math.log(sqrt(2.0 / math.pi) * 1e+5)
        sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / (
            2.0 * dp_epsilon)
        #print(sigmaEGM)
        __noiseE = np.random.normal(0.0, sigmaEGM,
                                    14 * 14 * 128).astype(np.float32)
        __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128])
        print("Compute The Noise Redistribution Vector")
        for step in xrange(_global_step, 100 * step_for_epoch):
            batch = cifar10_data.train.next_batch(batch_size)
            #Get a random batch.
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    noise: __noiseE * 0,
                    sigma: sigma_value * 0
                })
            if step % (5 * step_for_epoch) == 0:
                print(loss_value)
        batch = cifar10_data.train.next_batch(40 * batch_size)
        grad_redis = sess.run([normalized_grad_r],
                              feed_dict={
                                  x: batch[0],
                                  y_: batch[1],
                                  noise: __noiseE * 0
                              })
        _sensitivity_2 = sess.run([sensitivity_2],
                                  feed_dict={
                                      x: batch[0],
                                      y_: batch[1],
                                      noise: __noiseE * 0
                                  })
        #print(_sensitivity_2)

        _sensitivityW = sess.run(sensitivityW)
        #print(_sensitivityW)
        Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0])
        #print(Delta_redis)
        sigmaHGM = sqrt(2.0) * Delta_redis * (
            sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon)
        #print(sigmaHGM)
        __noiseH = np.random.normal(0.0, sigmaHGM,
                                    14 * 14 * 128).astype(np.float32)
        __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis

        sess.run(init)
        print("Training")
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            batch = cifar10_data.train.next_batch(batch_size)
            #Get a random batch.
            #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2})
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    noise: (__noiseE + __noiseH) / 2,
                    sigma: sigma_value
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            sess.run([privacy_accum_op])
            spent_eps_deltas = priv_accountant.get_privacy_spent(
                sess, target_eps=target_eps)
            if step % (5 * step_for_epoch) == 0:
                print(loss_value)
                print(spent_eps_deltas)
            _break = False
            for _eps, _delta in spent_eps_deltas:
                if _delta >= delta:
                    _break = True
                    break
            if _break == True:
                break

        ## Robustness
        print("Testing")
        adv_acc_dict = {}
        robust_adv_acc_dict = {}
        robust_adv_utility_dict = {}
        test_bach_size = 5000
        for atk in attack_switch.keys():
            if atk not in adv_acc_dict:
                adv_acc_dict[atk] = -1
                robust_adv_acc_dict[atk] = -1
                robust_adv_utility_dict[atk] = -1
            if attack_switch[atk]:
                test_bach = cifar10_data.test.next_batch(test_bach_size)
                adv_images_dict = sess.run(attack_tensor_dict[atk],
                                           feed_dict={x: test_bach[0]})
                ### PixelDP Robustness ###
                predictions_form_argmax = np.zeros([test_bach_size, 10])
                softmax_predictions = sess.run(softmax_y_conv,
                                               feed_dict={
                                                   x: adv_images_dict,
                                                   noise:
                                                   (__noiseE + __noiseH) / 2
                                               })
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
                for n_draws in range(0, 1000):
                    _noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 *
                                               128).astype(np.float32)
                    _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128])
                    _noise = np.random.normal(0.0, sigmaHGM,
                                              14 * 14 * 128).astype(np.float32)
                    _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis
                    for j in range(test_bach_size):
                        pred = argmax_predictions[j]
                        predictions_form_argmax[j, pred] += 1
                    softmax_predictions = sess.run(
                        softmax_y_conv,
                        feed_dict={
                            x:
                            adv_images_dict,
                            noise:
                            (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4
                        })
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                final_predictions = predictions_form_argmax
                is_correct = []
                is_robust = []
                for j in range(test_bach_size):
                    is_correct.append(
                        np.argmax(test_bach[1][j]) == np.argmax(
                            final_predictions[j]))
                    robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                        counts=predictions_form_argmax[j],
                        eta=0.05,
                        dp_attack_size=fgsm_eps,
                        dp_epsilon=dp_epsilon,
                        dp_delta=0.05,
                        dp_mechanism='gaussian') / dp_mult
                    is_robust.append(robustness_from_argmax >= fgsm_eps)
                adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size
                robust_adv_acc_dict[atk] = np.sum([
                    a and b for a, b in zip(is_robust, is_correct)
                ]) * 1.0 / np.sum(is_robust)
                robust_adv_utility_dict[atk] = np.sum(
                    is_robust) * 1.0 / test_bach_size
                ##############################
        log_str = ""
        for atk in attack_switch.keys():
            if attack_switch[atk]:
                # added robust prediction
                log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                    atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                    robust_adv_utility_dict[atk],
                    robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
        print(log_str)
        logfile.write(log_str + '\n')
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2,
          eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" %
                  (fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        eps_benign = 1 / (1 + eps2_ratio) * (epsilon2)
        eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2)

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        kernel1 = _variable_with_weight_decay(
            'kernel1',
            shape=[4, 4, 3, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[AECODER_VARIABLES])
        biases1 = _bias_on_cpu('biases1', [128],
                               tf.constant_initializer(0.0),
                               collect=[AECODER_VARIABLES])

        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity
                              )  #2*3*(14*14 + 2)*16/(L*sensitivity)

        #with tf.variable_scope('conv2') as scope:
        kernel2 = _variable_with_weight_decay(
            'kernel2',
            shape=[5, 5, 128, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases2 = _bias_on_cpu('biases2', [128],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('conv3') as scope:
        kernel3 = _variable_with_weight_decay(
            'kernel3',
            shape=[5, 5, 256, 256],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases3 = _bias_on_cpu('biases3', [256],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local4') as scope:
        kernel4 = _variable_with_weight_decay(
            'kernel4',
            shape=[int(image_size / 4)**2 * 256, hk],
            stddev=0.04,
            wd=0.004,
            collect=[CONV_VARIABLES])
        biases4 = _bias_on_cpu('biases4', [hk],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local5') as scope:
        kernel5 = _variable_with_weight_decay(
            'kernel5', [hk, 10],
            stddev=np.sqrt(2.0 /
                           (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases5 = _bias_on_cpu('biases5', [10],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])

        #scale2 = tf.Variable(tf.ones([hk]))
        #beta2 = tf.Variable(tf.zeros([hk]))

        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        ########

        # Build a Graph that computes the logits predictions from the
        # inference model.
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])
        noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_noise = tf.placeholder(tf.float32,
                                   [None, image_size, image_size, 3])

        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])

        # Auto-Encoder #
        Enc_Layer2 = EncLayer(inpt=adv_x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0],
                                                 Delta=Delta2,
                                                 epsilon=epsilon2,
                                                 batch_size=L,
                                                 learning_rate=learning_rate,
                                                 W=kernel1,
                                                 b=biases1,
                                                 perturbFMx=adv_noise,
                                                 perturbFM_h=FM_h)
        Enc_Layer3 = EncLayer(inpt=x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_benign = Enc_Layer3.get_train_ops2(
            xShape=tf.shape(x)[0],
            Delta=Delta2,
            epsilon=epsilon2,
            batch_size=L,
            learning_rate=learning_rate,
            W=kernel1,
            b=biases1,
            perturbFMx=noise,
            perturbFM_h=FM_h)
        cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0)
        ###

        x_image = x + noise
        y_conv = inference(x_image, FM_h, params)
        softmax_y_conv = tf.nn.softmax(y_conv)
        y_ = tf.placeholder(tf.float32, [None, 10])

        adv_x += adv_noise
        y_adv_conv = inference(adv_x, FM_h, params)
        adv_y_ = tf.placeholder(tf.float32, [None, 10])

        # Calculate loss. Apply Taylor Expansion for the output layer
        perturbW = perturbFM * params[8]
        loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha,
                                 perturbW)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]);
        pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
        train_var_list = tf.get_collection(CONV_VARIABLES)
        #print(pretrain_var_list)
        #print(train_var_list)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize(
                pretrain_adv + pretrain_benign,
                global_step=global_step,
                var_list=pretrain_var_list)
            train_op = cifar10.train(loss,
                                     global_step,
                                     learning_rate,
                                     _var_list=train_var_list)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        sess.run(kernel1.initializer)
        dp_epsilon = 1.0
        _gamma = sess.run(gamma)
        _gamma_x = Delta2 / L
        epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x)
        print(epsilon2_update / _gamma + epsilon2_update / _gamma_x)
        print(epsilon2_update)
        delta_r = fgsm_eps * (image_size**2)
        _sensitivityW = sess.run(sensitivity)
        delta_h = _sensitivityW * (14**2)
        #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
        #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
        dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + (
            2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon)

        dynamic_eps = tf.placeholder(tf.float32)
        """y_test = inference(x, FM_h, params)
    softmax_y = tf.nn.softmax(y_test);
    c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0)
    x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])"""

        attack_switch = {
            'fgsm': True,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            image_size=image_size,
            adv_noise=adv_noise)

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])
        attack_tensor_dict = {}
        # FastGradientMethod
        if attack_switch['fgsm']:
            print('creating attack tensor of FastGradientMethod')
            fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
            x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                                eps=mu_alpha,
                                                clip_min=-1.0,
                                                clip_max=1.0)  # testing now
            attack_tensor_dict['fgsm'] = x_adv_test_fgsm

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        if attack_switch['ifgsm']:
            print('creating attack tensor of BasicIterativeMethod')
            ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        if attack_switch['mim']:
            print('creating attack tensor of MomentumIterativeMethod')
            mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_mim = mim_obj.generate(x=x,
                                              eps=mu_alpha,
                                              eps_iter=fgsm_eps / 3,
                                              nb_iter=3,
                                              decay_factor=1.0,
                                              clip_min=-1.0,
                                              clip_max=1.0)
            attack_tensor_dict['mim'] = x_adv_test_mim

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        if attack_switch['madry']:
            print('creating attack tensor of MadryEtAl')
            madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
            #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_madry = madry_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['madry'] = x_adv_test_madry

        #====================== attack =========================

        #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        sess.run(init)

        # Start the queue runners.
        #tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint,
                                               sess.graph)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps
        step_for_epoch = int(math.ceil(D / L))
        #number of steps for one epoch

        perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128)
        perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128])

        #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32)
        #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3])

        perturbFM_h = np.random.laplace(0.0,
                                        2 * Delta2 / (epsilon2_update * L),
                                        14 * 14 * 128)
        perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128])

        #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32)
        #_W_adv = np.reshape(_W_adv, [32, 32, 3])
        #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128)
        #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]);

        test_size = len(cifar10_data.test.images)
        #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt')
        #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta);
        #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L)
        Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L)
        #generateNoise(image_size, Delta2, eps_adv, L, beta);
        Noise_test = generateIdLMNoise(
            image_size, 0, epsilon2_update,
            L)  #generateNoise(image_size, 0, 2*epsilon2, test_size, beta);

        emsemble_L = int(L / 3)
        preT_epochs = 100
        pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1)
        """logfile.write("pretrain: \n")
    for step in range(_global_step, _global_step + pre_T):
        d_eps = random.random()*0.5;
        batch = cifar10_data.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
        for iter in range(0, 2):
            adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})
        #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h});
        batch = cifar10_data.train.next_batch(L);
        sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h});
        if step % int(25*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128)
            logfile.write("step \t %d \t %g \n"%(step, cost_value))
            print(cost_value)
    print('pre_train finished')"""

        _global_step = 0
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            d_eps = random.random() * 0.5
            batch = cifar10_data.train.next_batch(emsemble_L)
            #Get a random batch.
            y_adv_batch = batch[1]
            """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
      for iter in range(0, 2):
          adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})"""
            adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_mim = sess.run(attack_tensor_dict['mim'],
                                      feed_dict={
                                          x: batch[0],
                                          adv_noise: Noise,
                                          mu_alpha: [d_eps]
                                      })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_madry = sess.run(attack_tensor_dict['madry'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            adv_images = np.append(np.append(adv_images_ifgsm,
                                             adv_images_mim,
                                             axis=0),
                                   adv_images_madry,
                                   axis=0)

            batch = cifar10_data.train.next_batch(L)
            #Get a random batch.

            sess.run(pretrain_step,
                     feed_dict={
                         x: batch[0],
                         adv_x: adv_images,
                         adv_noise: Noise_test,
                         noise: Noise,
                         FM_h: perturbFM_h
                     })
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    adv_x: adv_images,
                    adv_y_: y_adv_batch,
                    noise: Noise,
                    adv_noise: Noise_test,
                    FM_h: perturbFM_h
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            # report the result periodically
            if step % (50 * step_for_epoch) == 0 and step >= (300 *
                                                              step_for_epoch):
                '''predictions_form_argmax = np.zeros([test_size, 10])
          softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})
          argmax_predictions = np.argmax(softmax_predictions, axis=1)
          """for n_draws in range(0, 2000):
            _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L)
            _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128)
            _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);"""
          for j in range(test_size):
            pred = argmax_predictions[j]
            predictions_form_argmax[j, pred] += 2000;
          """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)"""
          final_predictions = predictions_form_argmax;
          is_correct = []
          is_robust = []
          for j in range(test_size):
              is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j]))
              robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult
              is_robust.append(robustness_from_argmax >= fgsm_eps)
          acc = np.sum(is_correct)*1.0/test_size
          robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
          robust_utility = np.sum(is_robust)*1.0/test_size
          log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)'''

                #===================adv samples=====================
                log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(
                    step, total_eps)
                """adv_images_dict = {}
          for atk in attack_switch.keys():
              if attack_switch[atk]:
                  adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images})
          print("Done with the generating of Adversarial samples")"""
                #===================adv samples=====================
                adv_acc_dict = {}
                robust_adv_acc_dict = {}
                robust_adv_utility_dict = {}
                test_bach_size = 5000
                for atk in attack_switch.keys():
                    print(atk)
                    if atk not in adv_acc_dict:
                        adv_acc_dict[atk] = -1
                        robust_adv_acc_dict[atk] = -1
                        robust_adv_utility_dict[atk] = -1
                    if attack_switch[atk]:
                        test_bach = cifar10_data.test.next_batch(
                            test_bach_size)
                        adv_images_dict = sess.run(attack_tensor_dict[atk],
                                                   feed_dict={
                                                       x: test_bach[0],
                                                       adv_noise: Noise_test,
                                                       mu_alpha: [fgsm_eps]
                                                   })
                        print("Done adversarial examples")
                        ### PixelDP Robustness ###
                        predictions_form_argmax = np.zeros(
                            [test_bach_size, 10])
                        softmax_predictions = sess.run(softmax_y_conv,
                                                       feed_dict={
                                                           x: adv_images_dict,
                                                           noise: Noise,
                                                           FM_h: perturbFM_h
                                                       })
                        argmax_predictions = np.argmax(softmax_predictions,
                                                       axis=1)
                        for n_draws in range(0, 1000):
                            _BenignLNoise = generateIdLMNoise(
                                image_size, Delta2, epsilon2_update, L)
                            _perturbFM_h = np.random.laplace(
                                0.0, 2 * Delta2 / (epsilon2_update * L),
                                14 * 14 * 128)
                            _perturbFM_h = np.reshape(_perturbFM_h,
                                                      [-1, 14, 14, 128])
                            if n_draws == 500:
                                print("n_draws = 500")
                            for j in range(test_bach_size):
                                pred = argmax_predictions[j]
                                predictions_form_argmax[j, pred] += 1
                            softmax_predictions = sess.run(
                                softmax_y_conv,
                                feed_dict={
                                    x: adv_images_dict,
                                    noise: (_BenignLNoise / 10 + Noise),
                                    FM_h: perturbFM_h
                                }) * sess.run(
                                    softmax_y_conv,
                                    feed_dict={
                                        x: adv_images_dict,
                                        noise: Noise,
                                        FM_h: (_perturbFM_h / 10 + perturbFM_h)
                                    })
                            #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)})
                            argmax_predictions = np.argmax(softmax_predictions,
                                                           axis=1)
                        final_predictions = predictions_form_argmax
                        is_correct = []
                        is_robust = []
                        for j in range(test_bach_size):
                            is_correct.append(
                                np.argmax(test_bach[1][j]) == np.argmax(
                                    final_predictions[j]))
                            robustness_from_argmax = robustness.robustness_size_argmax(
                                counts=predictions_form_argmax[j],
                                eta=0.05,
                                dp_attack_size=fgsm_eps,
                                dp_epsilon=dp_epsilon,
                                dp_delta=0.05,
                                dp_mechanism='laplace') / dp_mult
                            is_robust.append(
                                robustness_from_argmax >= fgsm_eps)
                        adv_acc_dict[atk] = np.sum(
                            is_correct) * 1.0 / test_bach_size
                        robust_adv_acc_dict[atk] = np.sum([
                            a and b for a, b in zip(is_robust, is_correct)
                        ]) * 1.0 / np.sum(is_robust)
                        robust_adv_utility_dict[atk] = np.sum(
                            is_robust) * 1.0 / test_bach_size
                        ##############################
                for atk in attack_switch.keys():
                    if attack_switch[atk]:
                        # added robust prediction
                        log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                            atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                            robust_adv_utility_dict[atk],
                            robust_adv_acc_dict[atk] *
                            robust_adv_utility_dict[atk])
                print(log_str)
                logfile.write(log_str + '\n')

            # Save the model checkpoint periodically.
            if step % (10 * step_for_epoch) == 0 and (step > _global_step):
                num_examples_per_step = L
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))
            """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
Ejemplo n.º 8
0
def train(fgsm_eps, _dp_epsilon, _attack_norm_bound, log_filename, ratio):
    FLAGS = None

    #ratio = 16
    #target_eps = [0.125,0.25,0.5,1,2,4,8]
    #target_eps = [0.25 + 0.25*ratio]
    target_eps = [0.2 + 0.2 * ratio]
    #print(target_eps[0])
    #fgsm_eps = 0.1
    dp_epsilon = _dp_epsilon
    image_size = 28
    _log_filename = log_filename + str(target_eps[0]) + '_fgsm_' + str(
        fgsm_eps) + '_dpeps_' + str(dp_epsilon) + '_attack_norm_bound_' + str(
            _attack_norm_bound) + '.txt'

    clip_bound = 0.001  # 'the clip bound of the gradients'
    clip_bound_2 = 1 / 1.5  # 'the clip bound for r_kM'

    small_num = 1e-5  # 'a small number'
    large_num = 1e5  # a large number'
    num_images = 50000  # 'number of images N'

    batch_size = 125  # 'batch_size L'
    sample_rate = batch_size / 50000  # 'sample rate q = L / N'
    # 900 epochs
    num_steps = 1800000  # 'number of steps T = E * N / L = E / q'
    num_epoch = 24  # 'number of epoches E'

    sigma = 5  # 'sigma'
    delta = 1e-5  # 'delta'

    lambd = 1e3  # 'exponential distribution parameter'

    iterative_clip_step = 2  # 'iterative_clip_step'

    clip = 1  # 'whether to clip the gradient'
    noise = 0  # 'whether to add noise'
    redistribute = 0  # 'whether to redistribute the noise'

    D = 50000

    sess = tf.InteractiveSession()

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])
    keep_prob = tf.placeholder(tf.float32)

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    W_fc1 = weight_variable([7 * 7 * 64, 25])
    b_fc1 = bias_variable([25])
    W_fc2 = weight_variable([25, 10])
    b_fc2 = bias_variable([10])

    def inference(x, dp_mult):
        x_image = tf.reshape(x, [-1, 28, 28, 1])
        h_conv1 = tf.nn.relu((conv2d(x_image, W_conv1) + b_conv1) + dp_mult)
        h_pool1 = max_pool_2x2(h_conv1)
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        return y_conv, h_conv1

    def inference_prob(x):
        logits, _ = inference(x, 0)
        y_prob = tf.nn.softmax(logits)
        return y_prob

    shape = W_conv1.get_shape().as_list()
    w_t = tf.reshape(W_conv1, [-1, shape[-1]])
    w = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivityW = tf.reduce_max(sing_vals)
    dp_delta = 0.05
    attack_norm_bound = _attack_norm_bound
    dp_mult = attack_norm_bound * math.sqrt(
        2 * math.log(1.25 / dp_delta)) / dp_epsilon
    noise = tf.placeholder(tf.float32, [None, 28, 28, 32])

    #y_conv, h_conv1 = inference(x, dp_mult * noise)
    y_conv, h_conv1 = inference(x, attack_norm_bound * noise)
    softmax_y = tf.nn.softmax(y_conv)
    # Define loss and optimizer

    priv_accountant = accountant.GaussianMomentsAccountant(D)
    privacy_accum_op = priv_accountant.accumulate_privacy_spending(
        [None, None], sigma, batch_size)

    # sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy);
    #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy)

    # noise redistribution #
    grad, = tf.gradients(cross_entropy, h_conv1)
    normalized_grad = tf.sign(grad)
    normalized_grad = tf.stop_gradient(normalized_grad)
    normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))
    #print(normalized_grad_r)
    sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False)
    #print(sum_r)
    normalized_grad_r = 256 * 32 * normalized_grad_r / sum_r
    print(normalized_grad_r)

    shape_grad = normalized_grad_r.get_shape().as_list()
    grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]])
    g = tf.transpose(grad_t)
    sing_g_vals = tf.svd(g, compute_uv=False)
    sensitivity_2 = tf.reduce_max(sing_g_vals)
    ########################

    opt = GradientDescentOptimizer(learning_rate=1e-1)

    # compute gradient
    gw_W1 = tf.gradients(cross_entropy, W_conv1)[0]  # gradient of W1
    gb1 = tf.gradients(cross_entropy, b_conv1)[0]  # gradient of b1

    gw_W2 = tf.gradients(cross_entropy, W_conv2)[0]  # gradient of W2
    gb2 = tf.gradients(cross_entropy, b_conv2)[0]  # gradient of b2

    gw_Wf1 = tf.gradients(cross_entropy, W_fc1)[0]  # gradient of W_fc1
    gbf1 = tf.gradients(cross_entropy, b_fc1)[0]  # gradient of b_fc1

    gw_Wf2 = tf.gradients(cross_entropy, W_fc2)[0]  # gradient of W_fc2
    gbf2 = tf.gradients(cross_entropy, b_fc2)[0]  # gradient of b_fc2

    # clip gradient
    gw_W1 = tf.clip_by_norm(gw_W1, clip_bound)
    gw_W2 = tf.clip_by_norm(gw_W2, clip_bound)
    gw_Wf1 = tf.clip_by_norm(gw_Wf1, clip_bound)
    gw_Wf2 = tf.clip_by_norm(gw_Wf2, clip_bound)

    # sigma = FLAGS.sigma # when comp_eps(lmbda,q,sigma,T,delta)==epsilon

    # sensitivity = 2 * FLAGS.clip_bound #adjacency matrix with one tuple different
    sensitivity = clip_bound  # adjacency matrix with one more tuple

    gw_W1 += tf.random_normal(shape=tf.shape(gw_W1),
                              mean=0.0,
                              stddev=(sigma * sensitivity)**2,
                              dtype=tf.float32)
    gb1 += tf.random_normal(shape=tf.shape(gb1),
                            mean=0.0,
                            stddev=(sigma * sensitivity)**2,
                            dtype=tf.float32)
    gw_W2 += tf.random_normal(shape=tf.shape(gw_W2),
                              mean=0.0,
                              stddev=(sigma * sensitivity)**2,
                              dtype=tf.float32)
    gb2 += tf.random_normal(shape=tf.shape(gb2),
                            mean=0.0,
                            stddev=(sigma * sensitivity)**2,
                            dtype=tf.float32)
    gw_Wf1 += tf.random_normal(shape=tf.shape(gw_Wf1),
                               mean=0.0,
                               stddev=(sigma * sensitivity)**2,
                               dtype=tf.float32)
    gbf1 += tf.random_normal(shape=tf.shape(gbf1),
                             mean=0.0,
                             stddev=(sigma * sensitivity)**2,
                             dtype=tf.float32)
    gw_Wf2 += tf.random_normal(shape=tf.shape(gw_Wf2),
                               mean=0.0,
                               stddev=(sigma * sensitivity)**2,
                               dtype=tf.float32)
    gbf2 += tf.random_normal(shape=tf.shape(gbf2),
                             mean=0.0,
                             stddev=(sigma * sensitivity)**2,
                             dtype=tf.float32)

    train_step = opt.apply_gradients([(gw_W1, W_conv1), (gb1, b_conv1),
                                      (gw_W2, W_conv2), (gb2, b_conv2),
                                      (gw_Wf1, W_fc1), (gbf1, b_fc1),
                                      (gw_Wf2, W_fc2), (gbf2, b_fc2)])

    # craft adversarial samples from x for testing
    #softmax_y_test = tf.nn.softmax(y_conv)

    #====================== attack =========================

    attack_switch = {
        'fgsm': True,
        'ifgsm': True,
        'deepfool': False,
        'mim': True,
        'spsa': False,
        'cwl2': False,
        'madry': True,
        'stm': False
    }

    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CallableModelWrapper(callable_fn=inference,
                                           output_layer='logits')
    ch_model_probs = CallableModelWrapper(callable_fn=inference_prob,
                                          output_layer='probs')

    # define each attack method's tensor
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                            eps=fgsm_eps,
                                            clip_min=0.0,
                                            clip_max=1.0)  # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 10,
                                              nb_iter=10,
                                              clip_min=0.0,
                                              clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        x_adv_test_mim = mim_obj.generate(x=x,
                                          eps=fgsm_eps,
                                          eps_iter=fgsm_eps / 10,
                                          nb_iter=10,
                                          decay_factor=1.0,
                                          clip_min=0.0,
                                          clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        x_adv_test_madry = madry_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 10,
                                              nb_iter=10,
                                              clip_min=0.0,
                                              clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    #====================== attack =========================

    #Define the correct prediction and accuracy#
    correct_prediction_x = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32))

    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    s = math.log(sqrt(2.0 / math.pi) * 1e+5)
    sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) +
                                  sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon)
    print(sigmaEGM)
    __noiseE = np.random.normal(0.0, sigmaEGM**2,
                                28 * 28 * 32).astype(np.float32)
    __noiseE = np.reshape(__noiseE, [-1, 28, 28, 32])

    start_time = time.time()
    logfile = open(_log_filename, 'w')
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_benign_acc = -1
    max_adv_acc_dict = {}
    test_size = len(mnist.test.images)
    print("Computing The Noise Redistribution Vector")
    for i in range(4000):
        batch = mnist.train.next_batch(batch_size)
        sess.run([train_step],
                 feed_dict={
                     x: batch[0],
                     y_: batch[1],
                     keep_prob: 0.5,
                     noise: __noiseE * 0
                 })
    batch = mnist.train.next_batch(batch_size * 10)
    grad_redis = sess.run([normalized_grad_r],
                          feed_dict={
                              x: batch[0],
                              y_: batch[1],
                              keep_prob: 1.0,
                              noise: __noiseE * 0
                          })
    #print(grad_redis)
    _sensitivity_2 = sess.run([sensitivity_2],
                              feed_dict={
                                  x: batch[0],
                                  y_: batch[1],
                                  keep_prob: 1.0,
                                  noise: __noiseE * 0
                              })
    #print(_sensitivity_2)

    _sensitivityW = sess.run(sensitivityW)
    #print(_sensitivityW)
    Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0])
    #print(Delta_redis)
    sigmaHGM = sqrt(2.0) * Delta_redis * (sqrt(s) + sqrt(s + dp_epsilon)) / (
        2.0 * dp_epsilon)
    #print(sigmaHGM)
    __noiseH = np.random.normal(0.0, sigmaHGM**2,
                                28 * 28 * 32).astype(np.float32)
    __noiseH = np.reshape(__noiseH, [-1, 28, 28, 32]) * grad_redis

    sess.run(tf.global_variables_initializer())
    print("Training")
    for i in range(num_steps):
        batch = mnist.train.next_batch(batch_size)
        sess.run(
            [train_step],
            feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob: 0.5,
                noise: (__noiseE + __noiseH) / 2
            })
        sess.run([privacy_accum_op])
        spent_eps_deltas = priv_accountant.get_privacy_spent(
            sess, target_eps=target_eps)
        if i % 1000 == 0:
            print(i, spent_eps_deltas)
        _break = False
        for _eps, _delta in spent_eps_deltas:
            if _delta >= delta:
                _break = True
                break
        if _break == True:
            break
    print("Testing")
    benign_acc = accuracy_x.eval(
        feed_dict={
            x: mnist.test.images,
            y_: mnist.test.labels,
            keep_prob: 1.0,
            noise: (__noiseE + __noiseH) / 2
        })
    ### PixelDP Robustness ###
    adv_acc_dict = {}
    robust_adv_acc_dict = {}
    robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in adv_acc_dict:
            adv_acc_dict[atk] = -1
            robust_adv_acc_dict[atk] = -1
            robust_adv_utility_dict[atk] = -1

        if attack_switch[atk]:
            adv_images_dict = sess.run(attack_tensor_dict[atk],
                                       feed_dict={
                                           x: mnist.test.images,
                                           y_: mnist.test.labels,
                                           keep_prob: 1.0
                                       })
            #grad_redis = sess.run([normalized_grad_r], feed_dict={x: adv_images_dict, y_: mnist.test.labels, keep_prob: 1.0, noise:__noise})
            ### Robustness ###
            predictions_form_argmax = np.zeros([test_size, 10])
            softmax_predictions = softmax_y.eval(
                feed_dict={
                    x: adv_images_dict,
                    keep_prob: 1.0,
                    noise: (__noiseE + __noiseH) / 2
                })
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 2000):
                if n_draws % 1000 == 0:
                    print(n_draws)
                _noiseE = np.random.normal(0.0, sigmaEGM**2,
                                           28 * 28 * 32).astype(np.float32)
                _noiseE = np.reshape(_noiseE, [-1, 28, 28, 32])
                _noise = np.random.normal(0.0, sigmaHGM**2,
                                          28 * 28 * 32).astype(np.float32)
                _noise = np.reshape(_noise, [-1, 28, 28, 32]) * grad_redis
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1
                softmax_predictions = softmax_y.eval(
                    feed_dict={
                        x: adv_images_dict,
                        keep_prob: 1.0,
                        noise: (__noiseE + __noiseH) / 2 +
                        (_noiseE + _noise) / 4
                    })
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(
                    np.argmax(mnist.test.labels[j]) == np.argmax(
                        final_predictions[j]))
                robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                    counts=predictions_form_argmax[j],
                    eta=0.05,
                    dp_attack_size=fgsm_eps,
                    dp_epsilon=dp_epsilon,
                    dp_delta=1e-5,
                    dp_mechanism='gaussian') / dp_mult
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_size
            robust_adv_acc_dict[atk] = np.sum([
                a and b for a, b in zip(is_robust, is_correct)
            ]) * 1.0 / np.sum(is_robust)
            robust_adv_utility_dict[atk] = np.sum(is_robust) * 1.0 / test_size
            print(" {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                robust_adv_utility_dict[atk],
                robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]))
            ##############################
    log_str = "step: {}\t target_epsilon: {}\t dp_epsilon: {:.1f}\t attack_norm_bound: {:.1f}\t benign_acc: {:.4f}\t".format(
        i, target_eps, dp_epsilon, attack_norm_bound, benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                robust_adv_utility_dict[atk],
                robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
    print(log_str)
    logfile.write(log_str + '\n')
    ##############################
    duration = time.time() - start_time
    logfile.write(str(duration) + '\n')
    logfile.flush()
    logfile.close()
def mnist_attack(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 viz_enabled=True,
                 nb_epochs=6,
                 batch_size=128,
                 nb_filters=64,
                 nb_samples=10,
                 learning_rate=0.001,
                 eps=0.3,
                 attack=0,
                 attack_iterations=100,
                 model_path=None,
                 targeted=False,
                 binary=False,
                 scale=False,
                 rand=False,
                 debug=None,
                 test=False,
                 data_dir=None,
                 delay=0,
                 adv=0,
                 nb_iter=40):
    """
    MNIST tutorial for generic attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param nb_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1237)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    if debug:
        set_log_level(logging.DEBUG)
    else:
        set_log_level(logging.WARNING)  # for running on sharcnet

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name='phase')

    # for attempting to break unscaled network.
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    save = False
    train_from_scratch = False
    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    # Define TF model graph
    if binary:
        print('binary=True')
        if scale:
            print('scale=True')
            if rand:
                print('rand=True')
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
                model = make_scaled_binary_rand_cnn(
                    phase,
                    logits_scalar,
                    'binsc_',
                    input_shape=(None, img_rows, img_cols, channels),
                    nb_filters=nb_filters)
            else:
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
                model = make_scaled_binary_cnn(phase,
                                               logits_scalar,
                                               'binsc_',
                                               input_shape=(None, img_rows,
                                                            img_cols,
                                                            channels),
                                               nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bin_',
                                          nb_filters=nb_filters)
    else:
        if rand:
            print('rand=True')
            from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn
            model = make_scaled_rand_cnn(phase,
                                         logits_scalar,
                                         'fp_rand',
                                         nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_cnn
            model = make_basic_cnn(phase,
                                   logits_scalar,
                                   'fp_',
                                   nb_filters=nb_filters)

    preds = model(x, reuse=False)  # * logits_scalar
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################
    rng = np.random.RandomState([2017, 8, 30])

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    def evaluate():
        # Evaluate the accuracy of the MNIST model on clean test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

        if adv != 0:
            # Accuracy of the adversarially trained model on adversarial
            # examples
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % acc)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params,
                             feed={logits_scalar: ATTACK_T})
            print('Test accuracy on adversarial examples (scaled): %0.4f' %
                  acc)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if test:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            if test:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            evaluate=evaluate,
                            args=train_params,
                            save=save,
                            rng=rng)
            else:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            args=train_params,
                            save=save,
                            rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          preds,
                          X_test,
                          Y_test,
                          phase=phase,
                          feed={phase: False},
                          args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Build dataset
    ###########################################################################
    if viz_enabled:
        assert nb_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
        viz_rows = nb_classes if targeted else 2
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        if viz_enabled:
            from cleverhans.utils import grid_visual
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels)
        else:
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
                img_cols, channels)
    else:
        if viz_enabled:
            from cleverhans.utils import pair_visual
            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        print('Attack: CarliniWagnerL2')
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        print('Attack: SaliencyMapMethod')
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        print('Attack: FastGradientMethod')
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        print('Attack: MadryEtAl')
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    adv_np = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples)
    fpath = os.path.join(
        '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name)
    np.savez(fpath, x=adv_np, y=Y_test[:nb_samples])
    '''
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                         phase: False}, args=eval_params)
    '''
    eval_params = {'batch_size': att_batch_size}
    if targeted:
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_np,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)

    else:
        print("Evaluating untargeted results")
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[idxs],
                                      phase=phase,
                                      args=eval_params)
        else:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[:nb_samples],
                                      phase=phase,
                                      args=eval_params)

    if viz_enabled:
        n = nb_classes - 1
        for i in range(nb_classes):
            if targeted:
                for j in range(nb_classes):
                    if i != j:
                        if j != 0 and i != n:
                            grid_viz_data[i, j] = adv_np[j * n + i]
                        if j == 0 and i > 0 or i == n and j > 0:
                            grid_viz_data[i, j] = adv_np[j * n + i - 1]
                    else:
                        grid_viz_data[i, j] = adv_inputs[j * n]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv_np[j]
        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Compute number of modified features (L_0 norm)
    nb_changed = np.where(adv_np != adv_inputs)[0].shape[0]
    percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0])

    # Compute the average distortion introduced by the algorithm
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f}'.format(accuracy))
    print('{0:.4f}'.format(adv_accuracy))
    print('{0:.4f}'.format(percent_perturbed))
    print('{0:.4f}'.format(percent_perturb))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Ejemplo n.º 10
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter

    #### EMPIR extra flags
    lowprecision = FLAGS.lowprecision
    abits = FLAGS.abits
    wbits = FLAGS.wbits
    abitsList = FLAGS.abitsList
    wbitsList = FLAGS.wbitsList
    stocRound = True if FLAGS.stocRound else False
    rand = FLAGS.rand
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree = True if FLAGS.ensembleThree else False
    abits2 = FLAGS.abits2
    wbits2 = FLAGS.wbits2
    abits2List = FLAGS.abits2List
    wbits2List = FLAGS.wbits2List
    inpgradreg = True if FLAGS.inpgradreg else False
    distill = True if FLAGS.distill else False
    student_epochs = FLAGS.student_epochs
    l2dbl = FLAGS.l2dbl
    l2cs = FLAGS.l2cs
    ####

    save = False
    train_from_scratch = False

    if ensembleThree:
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, batch_size,
                                                   nb_filters, learning_rate,
                                                   nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if ensembleThree:
        if (wbitsList is None) or (
                abitsList is None
        ):  # Layer wise separate quantization not specified for first model
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec1 = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the first model"
            )
            sys.exit(1)
        else:
            fixedPrec1 = 0

        if (wbits2List is None) or (
                abits2List is None
        ):  # Layer wise separate quantization not specified for second model
            if (wbits2 == 0) or (abits2 == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags"
                )
                sys.exit(1)
            else:
                fixedPrec2 = 1
        elif (len(wbits2List) != 3) or (len(abits2List) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the second model"
            )
            sys.exit(1)
        else:
            fixedPrec2 = 0

        if (fixedPrec2 != 1) or (
                fixedPrec1 != 1
        ):  # Atleast one of the models have separate precisions per layer
            fixedPrec = 0
            print("Within atleast one model has separate precisions")
            if (fixedPrec1 == 1):  # first layer has fixed precision
                abitsList = (abits, abits, abits)
                wbitsList = (wbits, wbits, wbits)
            if (fixedPrec2 == 1):  # second layer has fixed precision
                abits2List = (abits2, abits2, abits2)
                wbits2List = (wbits2, wbits2, wbits2)
        else:
            fixedPrec = 1

        if (train_from_scratch):
            print("The ensemble model cannot be trained from scratch")
            sys.exit(1)
        if fixedPrec == 1:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn
            model = make_ensemble_three_cifar_cnn(phase,
                                                  logits_scalar,
                                                  'lp1_',
                                                  'lp2_',
                                                  'fp_',
                                                  wbits,
                                                  abits,
                                                  wbits2,
                                                  abits2,
                                                  input_shape=(None, img_rows,
                                                               img_cols,
                                                               channels),
                                                  nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn_layerwise
            model = make_ensemble_three_cifar_cnn_layerwise(
                phase,
                logits_scalar,
                'lp1_',
                'lp2_',
                'fp_',
                wbitsList,
                abitsList,
                wbits2List,
                abits2List,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters)
    elif lowprecision:
        if (wbitsList is None) or (
                abitsList is
                None):  # Layer wise separate quantization not specified
            if (wbits == 0) or (abits == 0):
                print(
                    "Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags"
                )
                sys.exit(1)
            else:
                fixedPrec = 1
        elif (len(wbitsList) != 3) or (len(abitsList) != 3):
            print(
                "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers"
            )
            sys.exit(1)
        else:
            fixedPrec = 0

        if fixedPrec:
            from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_cifar_cnn
            model = make_basic_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbits,
                abits,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
        else:
            from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_cifar_cnn
            model = make_layerwise_lowprecision_cifar_cnn(
                phase,
                logits_scalar,
                'lp_',
                wbitsList,
                abitsList,
                input_shape=(None, img_rows, img_cols, channels),
                nb_filters=nb_filters,
                stocRound=stocRound)
    elif distill:
        from cleverhans_tutorials.tutorial_models import make_distilled_cifar_cnn
        model = make_distilled_cifar_cnn(phase,
                                         logits_scalar,
                                         'teacher_fp_',
                                         'fp_',
                                         nb_filters=nb_filters,
                                         input_shape=(None, img_rows, img_cols,
                                                      channels))
    ####
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cifar_cnn
        model = make_basic_cifar_cnn(phase,
                                     logits_scalar,
                                     'fp_',
                                     input_shape=(None, img_rows, img_cols,
                                                  channels),
                                     nb_filters=nb_filters)

    # separate predictions of teacher for distilled training
    if distill:
        teacher_preds = model.teacher_call(x, reuse=False)
        teacher_logits = model.get_teacher_logits(x, reuse=False)

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
        ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)
        else:
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if distill:
            temperature = 10  # 1 means the teacher predictions are used as it is
            teacher_scaled_preds_val = model_train_teacher(sess,
                                                           x,
                                                           y,
                                                           teacher_preds,
                                                           teacher_logits,
                                                           temperature,
                                                           X_train,
                                                           Y_train,
                                                           phase=phase,
                                                           args=train_params,
                                                           rng=rng)
            eval_params = {'batch_size': batch_size}
            teacher_acc = model_eval(sess,
                                     x,
                                     y,
                                     teacher_preds,
                                     X_test,
                                     Y_test,
                                     phase=phase,
                                     args=eval_params)
            print(
                'Test accuracy of the teacher model on legitimate examples: %0.4f'
                % teacher_acc)
            print('Training the student model...')
            student_train_params = {
                'nb_epochs': student_epochs,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'loss_name': 'train loss',
                'filename': 'model',
                'reuse_global_step': False,
                'train_scope': 'train',
                'is_training': True
            }
            if save:
                student_train_params.update({'log_dir': model_path})
            y_teacher = tf.placeholder(tf.float32, shape=(None, nb_classes))
            model_train_student(sess,
                                x,
                                y,
                                preds,
                                temperature,
                                X_train,
                                Y_train,
                                y_teacher=y_teacher,
                                teacher_preds=teacher_scaled_preds_val,
                                alpha=0.3,
                                beta=0.7,
                                phase=phase,
                                evaluate=evaluate,
                                args=student_train_params,
                                save=save,
                                rng=rng)
        elif inpgradreg:
            model_train_inpgrad_reg(sess,
                                    x,
                                    y,
                                    preds,
                                    X_train,
                                    Y_train,
                                    phase=phase,
                                    evaluate=evaluate,
                                    l2dbl=l2dbl,
                                    l2cs=l2cs,
                                    args=train_params,
                                    save=save,
                                    rng=rng)
        else:
            # do clean training for 'nb_epochs' or 'delay' epochs
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv_train,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)

    else:
        if ensembleThree:
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            stored_variables = [
                'lp_conv1_init/k', 'lp_conv2_init/k', 'lp_conv3_init/k',
                'lp_ip1init/W', 'lp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[:5]))
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            variable_dict = dict(zip(stored_variables, variables[5:10]))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            stored_variables = [
                'fp_conv1_init/k', 'fp_conv2_init/k', 'fp_conv3_init/k',
                'fp_ip1init/W', 'fp_logits_init/W'
            ]
            variable_dict = dict(zip(stored_variables, variables[10:]))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
        else:
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble(sess,
                                       x,
                                       y,
                                       preds,
                                       X_test,
                                       Y_test,
                                       phase=phase,
                                       feed={phase: False},
                                       args=eval_params)
    else:
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds,
                              X_test,
                              Y_test,
                              phase=phase,
                              feed={phase: False},
                              args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
            img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model,
                                   back='tf',
                                   model_type=model_type,
                                   num_classes=nb_classes,
                                   sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model,
                                     back='tf',
                                     model_type=model_type,
                                     sess=sess,
                                     num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model,
                                      back='tf',
                                      model_type=model_type,
                                      sess=sess,
                                      num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model,
                             back='tf',
                             model_type=model_type,
                             sess=sess,
                             num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model,
                                        back='tf',
                                        sess=sess,
                                        model_type=model_type,
                                        num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  X_test_adv,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)
    else:
        # assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        if ensembleThree:
            adv_accuracy = model_eval_ensemble(sess,
                                               x,
                                               y,
                                               preds,
                                               X_test_adv,
                                               Y_test,
                                               phase=phase,
                                               args=eval_params)
        else:  #default below
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      X_test_adv,
                                      Y_test,
                                      phase=phase,
                                      args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()
    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
Ejemplo n.º 11
0
def targeted(model,
             input_dim,
             sess,
             X_test,
             target,
             eps,
             n_adv,
             attack,
             multi_model=False):
    '''
    Calculates adversarial examples with the projected gradient decent method by madry et al.
    :return: adversarial examples for X_test
    '''

    tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=95)
    if multi_model:
        cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                output_layer='probs')
    else:
        cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                output_layer='logits')
    x_op = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim[1]))
    if attack == 'PGD':
        attack_op = MadryEtAl(cleverhans_model, sess=sess)
        attack_params = {
            'eps': eps,
            'y_target': target,
            'clip_min': -1,
            'clip_max': 1
        }
    elif attack == 'FGSM':
        attack_op = FastGradientMethod(cleverhans_model, sess=sess)
        attack_params = {
            'eps': eps,
            'y_target': target,
            'clip_min': -1,
            'clip_max': 1
        }
    elif attack == 'CWL2':
        attack_op = CarliniWagnerL2(cleverhans_model, sess=sess)
        attack_params = {'max_iterations': 100, 'clip_min': -1, 'clip_max': 1}

    else:
        raise ValueError('[+] Attack not supported')

    if not os.path.exists('/root/asr-python/src/tmp2'):
        os.makedirs('/root/asr-python/src/tmp2')

    adv_x_op = attack_op.generate(x_op, **attack_params)
    m = input_dim[0]  #minibatchsize
    adv_x = np.zeros([m, input_dim[1]])
    adv_samples = n_adv
    single_advs = []
    for i in range(adv_samples):
        single_adv = sess.run(adv_x_op, feed_dict={x_op: X_test})
        single_advs.append(single_adv)
        # np.save(Path('/root/asr-python/src/tmp2', f'''{i}.npy'''), single_adv)
        adv_x += (1 / adv_samples) * single_adv

    # np.save(Path('/root/asr-python/src/tmp2', f'''combined.npy'''), adv_x)

    # adv_x
    return adv_x, np.array(single_advs)
Ejemplo n.º 12
0
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio))
    #############################
    ##Hyper-parameter Setting####
    #############################
    hk = 256; #number of hidden units at the last layer
    Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer
    Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer
    Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer
    D = 50000; #size of the dataset
    L = 2499; #batch size
    image_size = 28;
    padding = 4;
    #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units
    #gen_ratio = 1
    epsilon1 = 0.0; #0.175; #epsilon for dpLRP
    epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer
    epsilon3 = 0.1*(1); #epsilon for the last hidden layer
    total_eps = epsilon1 + epsilon2 + epsilon3
    print(total_eps)
    uncert = 0.1; #uncertainty modeling at the output layer
    infl = 1; #inflation rate in the privacy budget redistribution
    R_lowerbound = 1e-5; #lower bound of the LRP
    c = [0, 40, 50, 200] #norm bounds
    epochs = 200; #number of epochs
    preT_epochs = 50; #number of epochs
    T = int(D/L*epochs + 1); #number of steps T
    pre_T = int(D/L*preT_epochs + 1);
    step_for_epoch = int(D/L); #number of steps for one epoch
    
    broken_ratio = 1
    #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10]
    #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2)
    #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2)
    
    #fgsm_eps = 0.1
    rand_alpha = 0.05
    
    ##Robustness##
    robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2;
    ####
    
    LRPfile = os.getcwd() + '/Relevance_R_0_075.txt';
    #############################
    mnist = input_data.read_data_sets("MNIST_data/", one_hot = True);

    #############################
    ##Construct the Model########
    #############################
    #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer#

    #Compute the 1/|L| * Delta3 for the last hidden layer#
    """eps3_ratio = Delta3_adv/Delta3_benign;
    eps3_benign = 1/(1+eps3_ratio)*(epsilon3)
    eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)"""
    loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L);
    ###
    #End Step 4#
    # Parameters Declarification
    W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]);
    b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]);

    shape     = W_conv1.get_shape().as_list()
    w_t       = tf.reshape(W_conv1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*(14*14 + 2)*25/(L*sensitivity)
    
    dp_epsilon=1.0 #0.1
    delta_r = fgsm_eps*(image_size**2);
    #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
    #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
    
    W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]);
    b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]);

    W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]);
    b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]);

    W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]);
    b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]);

    """scale2 = tf.Variable(tf.ones([hk]))
    beta2 = tf.Variable(tf.zeros([hk]))
    tf.add_to_collections([CONV_VARIABLES], scale2)
    tf.add_to_collections([CONV_VARIABLES], beta2)"""

    params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2]
    ###


    #Step 5: Create the model#
    noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);
    adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);

    keep_prob = tf.placeholder(tf.float32);
    x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    x_image = tf.reshape(x, [-1,image_size,image_size,1]);

    #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]);

    # pretrain ###
    #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise)
    ###########

    adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]);

    #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]);

    # pretrain adv ###
    #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32)
    #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);
    FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]);
    Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h)
    Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h)
    ###########
    
    x_image += noise;
    x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature.
    
    adv_image += adv_noise;
    adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature.

    #perturbFM = np.random.laplace(0.0, scale3_benign, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10)
    perturbFM = np.reshape(perturbFM, [hk, 10]);
    
    y_conv = inference(x_image, perturbFM, hk, FM_h, params);
    softmax_y_conv = tf.nn.softmax(y_conv)
    #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T)

    #perturbFM = np.random.laplace(0.0, scale3_adv, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params);
    #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T)

    # test model
    perturbFM_test = np.random.laplace(0.0, 0, hk)
    perturbFM_test = np.reshape(perturbFM_test, [hk]);
    x_test = tf.reshape(x, [-1,image_size,image_size,1]);
    y_test = inference(x_test, perturbFM_test, hk, FM_h, params);
    #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T)

    #Define a place holder for the output label#
    y_ = tf.placeholder(tf.float32, [None, 10]);
    adv_y_ = tf.placeholder(tf.float32, [None, 10]);
    #End Step 5#
    #############################

    #############################
    ##Define loss and Optimizer##
    #############################
    '''
        Computes differentially private sigmoid cross entropy given `logits`.
        
        Measures the probability error in discrete classification tasks in which each
        class is independent and not mutually exclusive.
        
        For brevity, let `x = logits`, `z = labels`.  The logistic loss is
        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
        = (1 - z) * x + log(1 + exp(-x))
        = x - x * z + log(1 + exp(-x))
        
        For x < 0, to avoid overflow in exp(-x), we reformulate the above
        
        x - x * z + log(1 + exp(-x))
        = log(exp(x)) - x * z + log(1 + exp(-x))
        = - x * z + log(1 + exp(x))
        
        Hence, to ensure stability and avoid overflow, the implementation uses this
        equivalent formulation
        
        max(x, 0) - x * z + log(1 + exp(-abs(x)))
        
        `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have:
        
        Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv)));
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2)
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2)
        = F1 + F2
        where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2
        
        To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2.
        Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data.
        Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows:
        
        By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2):
        
        perturbFM = np.random.laplace(0.0, scale3, hk * 10)
        perturbFM = np.reshape(perturbFM/L, [hk, 10]);
        
        where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3);
        
        To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow].
        
        Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow
    '''
    ### Taylor for benign x
    zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype)
    cond = (y_conv >= zeros)
    relu_logits = array_ops.where(cond, y_conv, zeros)
    neg_abs_logits = array_ops.where(cond, -y_conv, y_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_benign = tf.abs(y_conv - y_)

    ### Taylor for adv_x
    zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype)
    cond_adv = (y_adv_conv >= zeros_adv)
    relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv)
    neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_adv = tf.abs(y_adv_conv - adv_y_)

    ### Adversarial training loss
    adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv)

    '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.'''
    global_step = tf.Variable(0, trainable=False)
    pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
    train_var_list = tf.get_collection(CONV_VARIABLES)
    #print(pretrain_var_list)
    #print(train_var_list)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list);
        train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list);
    sess = tf.InteractiveSession();

    # Define the correct prediction and accuracy
    # This needs to be changed to "Robust Prediction"
    correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1));
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32));

    #############
    # use these to get predictions wrt to robust conditions
    """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32))
    accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask)
    #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))))
    certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))"""
    #############

    # craft adversarial samples from x for training
    dynamic_eps = tf.placeholder(tf.float32);
    emsemble_L = int(L/3)
    softmax_y = tf.nn.softmax(y_test)
    #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0)
    c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM
    x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]);

    #====================== attack =========================
    #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    #other possible attacks:
        # ElasticNetMethod
        # FastFeatureAdversaries
        # LBFGS
        # SaliencyMapMethod
        # VirtualAdversarialMethod

    # y_test = logits (before softmax)
    # softmax_y_test = preds (probs, after softmax)
    softmax_y_test = tf.nn.softmax(y_test)

    # create saver
    saver = tf.train.Saver(tf.all_variables())
    
    sess.run(W_conv1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = Delta2/L
    epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    print(epsilon2_update/_gamma + epsilon2_update/_gamma_x)
    print(epsilon2_update)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(14**2)
    dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
    #############################
    
    iterativeStep = 100
    
    # load the most recent models
    _global_step = 0
    ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train')
    if ckpt and ckpt.model_checkpoint_path:
        print(ckpt.model_checkpoint_path);
        saver.restore(sess, ckpt.model_checkpoint_path)
        _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        print('No checkpoint file found')

    start_time = time.time();

    # adv pretrain model (Auto encoder layer)
    cost = tf.reduce_sum(Enc_Layer2.cost);
    logfile.write("pretrain: \n")
    
    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)

    # rand+fgsm
    # if attack_switch['randfgsm']:
    #     randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
    #     x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0)
    #     x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0))

    # define each attack method's tensor
    mu_alpha = tf.placeholder(tf.float32, [1]);
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
        x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # Deepfool
    if attack_switch['deepfool']:
        print('creating attack tensor of DeepFool')
        deepfool_obj = DeepFool(model=ch_model_logits, sess=sess)
        #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['deepfool'] = x_adv_test_deepfool

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # SPSA
    # note here the epsilon is the infinity norm instead of precent of perturb
    # Maybe exclude this method first, since it seems to have some constrain about the data value range
    if attack_switch['spsa']:
        print('creating attack tensor of SPSA')
        spsa_obj = SPSA(model=ch_model_logits, sess=sess)
        #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2)
        x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1)
        attack_tensor_dict['spsa'] = x_adv_test_spsa

    # CarliniWagnerL2
    # confidence=0 is fron their paper
    # it is said to be slow, maybe exclude first
    if attack_switch['cwl2']:
        print('creating attack tensor of CarliniWagnerL2')
        cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess)
        #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['cwl2'] = x_adv_test_cwl2

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    # SpatialTransformationMethod
    # the params are pretty different from on the paper
    # so I use default
    # exclude since there's bug
    if attack_switch['stm']:
        print('creating attack tensor of SpatialTransformationMethod')
        stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2)
        x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6)
        attack_tensor_dict['stm'] = x_adv_test_stm
    #====================== attack =========================
    
    sess.run(tf.initialize_all_variables());

    ##perturb h for training
    perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
    perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);

    ##perturb h for testing
    perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32)
    perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]);

    '''for i in range(_global_step, _global_step + pre_T):
        d_eps = random.random();
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        """batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)"""

        batch_2 = mnist.train.next_batch(L);
        pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h});
        if i % int(5*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            logfile.write("step \t %d \t %g \n"%(i, cost_value))
            print(cost_value)

    pre_train_finish_time = time.time()
    print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))'''

    # train and test model with adv samples
    max_benign_acc = -1;
    max_robust_benign_acc = -1
    #max_adv_acc = -1;

    test_size = len(mnist.test.images)
    AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size);

    Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L);
    BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_adv_acc_dict = {}
    max_robust_adv_acc_dict = {}
    #max_robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in max_adv_acc_dict:
            max_adv_acc_dict[atk] = -1
            max_robust_adv_acc_dict[atk] = -1

    for i in range(_global_step, _global_step + T):
        # this batch is for generating adv samples
        batch = mnist.train.next_batch(emsemble_L); #Get a random batch.
        y_adv_batch = batch[1]
        #The number of epochs we print out the result. Print out the result every 5 epochs.
        if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch):
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            print(cost_value)
            
            if last_eval_time < 0:
                last_eval_time = time.time()
            #===================benign samples=====================
            predictions_form_argmax = np.zeros([test_size, 10])
            #test_bach = mnist.test.next_batch(test_size)
            softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 1):
                _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1;
                softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)})
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax;
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            acc = np.sum(is_correct)*1.0/test_size
            robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_utility = np.sum(is_robust)*1.0/test_size
            max_benign_acc = max(max_benign_acc, acc)
            max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility)
            log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)
            #===================adv samples=====================
            #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps)
            """adv_images_dict = {}
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels})
            print("Done with the generating of Adversarial samples")"""
            #===================adv samples=====================
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            for atk in attack_switch.keys():
                if atk not in adv_acc_dict:
                    adv_acc_dict[atk] = -1
                    robust_adv_acc_dict[atk] = -1
                    robust_adv_utility_dict[atk] = -1
                if attack_switch[atk]:
                    adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]})
                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([test_size, 10])
                    softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h})
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(0, 2000):
                        if n_draws % 1000 == 0:
                            print(n_draws)
                        _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                        _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                        _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                        for j in range(test_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1;
                        softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h})
                        #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h})
                        argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    final_predictions = predictions_form_argmax;
                    is_correct = []
                    is_robust = []
                    for j in range(test_size):
                        is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                        robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size
                    robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
                    robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size
                    ##############################
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    # added robust prediction
                    log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
                    max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk])
                    max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
            print(log_str)
            logfile.write(log_str + '\n')

            # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc))
            # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc));

            # estimate end time
            """if i > 0 and i % int(10*step_for_epoch) == 0:
                current_time_interval = time.time() - last_eval_time
                last_eval_time = time.time()
                print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval)))
                accum_time += current_time_interval
                accum_epoch += 10
                estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch)
                print('estimate finish in: {}'.format(parse_time(estimate_time)))"""

            #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty})));
            """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=i);"""

        d_eps = random.random();
        y_adv = batch[1]
        adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        """for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})"""
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        # train with benign and adv samples
        pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h});
        train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h});
    duration = time.time() - start_time;
    # print(parse_time(duration)); #print running time duration#

    max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk])
    logfile.write(max_acc_string + '\n')
    logfile.write(str(duration) + '\n')
Ejemplo n.º 13
0
def test(cifar10_data, checkpoint_path, epochs, L, learning_rate, scale3,
         Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps,
         parameter_dict, testing_step):
    # logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n"%(fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    # make sure variables are placed on cpu
    # TODO: for AWS version, check if put variables on GPU will be better
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)
        attacks = ['ifgsm', 'mim', 'madry']

        # manually create all scopes
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope:
            scope_conv1 = scope
        with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope:
            scope_conv2 = scope
        with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope:
            scope_conv3 = scope
        with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope:
            scope_local4 = scope
        with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope:
            scope_local5 = scope

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])):
        with tf.variable_scope(scope_conv1) as scope:
            kernel1 = _variable_with_weight_decay(
                'kernel1',
                shape=[4, 4, 3, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[AECODER_VARIABLES])
            biases1 = _bias_on_cpu('biases1', [128],
                                   tf.constant_initializer(0.0),
                                   collect=[AECODER_VARIABLES])

        #
        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity)

        with tf.variable_scope(scope_conv2) as scope:
            kernel2 = _variable_with_weight_decay(
                'kernel2',
                shape=[5, 5, 128, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases2 = _bias_on_cpu('biases2', [128],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_conv3) as scope:
            kernel3 = _variable_with_weight_decay(
                'kernel3',
                shape=[5, 5, 256, 256],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases3 = _bias_on_cpu('biases3', [256],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local4) as scope:
            kernel4 = _variable_with_weight_decay(
                'kernel4',
                shape=[int(image_size / 4)**2 * 256, hk],
                stddev=0.04,
                wd=0.004,
                collect=[CONV_VARIABLES])
            biases4 = _bias_on_cpu('biases4', [hk],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local5) as scope:
            kernel5 = _variable_with_weight_decay(
                'kernel5', [hk, 10],
                stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) /
                math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases5 = _bias_on_cpu('biases5', [10],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        # group these for use as parameters
        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        scopes = [
            scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5
        ]

        # placeholders for input values
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])  # one time
        noise = tf.placeholder(tf.float32,
                               [None, image_size, image_size, 3])  # one time
        adv_noise = tf.placeholder(
            tf.float32, [None, image_size, image_size, 3])  # one time

        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3
                                        ])  # input is the bunch of n_batchs

        y = tf.placeholder(tf.float32,
                           [None, 10])  # input is the bunch of n_batchs

        # benign conv output
        bi = 0
        x_image = x + noise
        # with tf.device('/gpu:0'):
        y_conv = inference(x_image,
                           FM_h,
                           params,
                           scopes,
                           training=True,
                           bn_index=bi)
        softmax_y_conv = tf.nn.softmax(y_conv)

        # start a session with memory growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print("session created")

        dp_epsilon = 1.0

        epsilon2_update = parameter_dict['epsilon2_update']

        delta_r = parameter_dict['delta_r']

        _sensitivityW = parameter_dict['_sensitivityW']

        delta_h = parameter_dict['delta_h']

        dp_mult = parameter_dict['dp_mult']

        # ============== attacks ================
        iter_step_training = parameter_dict['iter_step_training']

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            scopes=scopes,
            image_size=image_size,
            adv_noise=adv_noise)
        attack_tensor_dict = {}

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])

        # build each attack
        for atk in attacks:
            print('building attack {} tensors'.format(atk))
            # for each gpu assign to each attack
            if atk == 'ifgsm':
                ifgsm_obj = BasicIterativeMethod(model=ch_model_probs,
                                                 sess=sess)
                attack_tensor_dict[atk] = ifgsm_obj.generate(
                    x=x,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / testing_step,
                    nb_iter=testing_step,
                    clip_min=-1.0,
                    clip_max=1.0)
            elif atk == 'mim':
                mim_obj = MomentumIterativeMethod(model=ch_model_probs,
                                                  sess=sess)
                attack_tensor_dict[atk] = mim_obj.generate(
                    x=x,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / testing_step,
                    nb_iter=testing_step,
                    decay_factor=1.0,
                    clip_min=-1.0,
                    clip_max=1.0)
            elif atk == 'madry':
                madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
                attack_tensor_dict[atk] = madry_obj.generate(
                    x=x,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / testing_step,
                    nb_iter=testing_step,
                    clip_min=-1.0,
                    clip_max=1.0)

        # Create a saver and load checkpoint
        saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)
        saver.restore(sess, checkpoint_path)

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps

        step_for_epoch = parameter_dict[
            'step_for_epoch']  #number of steps for one epoch

        # load some fixed noise
        perturbH_test = parameter_dict['perturbH_test']

        perturbFM_h = parameter_dict['perturbFM_h']

        Noise = parameter_dict['Noise']

        Noise_test = parameter_dict['Noise_test']

        # test on testing dataset
        adv_acc_dict = {}
        robust_adv_acc_dict = {}
        robust_adv_utility_dict = {}
        test_batch_size = 5000
        n_draw = 1000
        begin_time = time.time()
        print('on testing set')
        print('test_batch_size: {}'.format(test_batch_size))
        print('testing iteration: {}'.format(testing_step))
        print('testing n_draw: {}'.format(n_draw))
        atk_index = -1
        for _ in [0, 1]:
            for atk in attacks:
                print(atk)
                if atk not in adv_acc_dict:
                    adv_acc_dict[atk] = -1
                    robust_adv_acc_dict[atk] = -1
                    robust_adv_utility_dict[atk] = -1
                # generate test samples
                test_batch = cifar10_data.test.next_batch(test_batch_size)
                adv_images = sess.run(attack_tensor_dict[atk],
                                      feed_dict={
                                          x: test_batch[0],
                                          adv_noise: Noise_test,
                                          mu_alpha: [fgsm_eps]
                                      })
                print("Done adversarial examples")
                ### PixelDP Robustness ###
                predictions_form_argmax = np.zeros([test_batch_size, 10])
                softmax_predictions = sess.run(softmax_y_conv,
                                               feed_dict={
                                                   x: adv_images,
                                                   noise: Noise,
                                                   FM_h: perturbFM_h
                                               })
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
                argmax_labels = np.argmax(test_batch[1], axis=1)
                print('labels')
                print(argmax_labels[0:100])
                print('init predictions')
                print(argmax_predictions[0:100])
                for _n_draws in range(0, n_draw):
                    _BenignLNoise = generateIdLMNoise(image_size, Delta2,
                                                      epsilon2_update, L)
                    _perturbFM_h = np.random.laplace(
                        0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128)
                    _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128])
                    if _n_draws == 500 or _n_draws == 1000:
                        print("n_draws = 500/1000")
                        print('time passed: {}s'.format(time.time() -
                                                        begin_time))
                    for j in range(test_batch_size):
                        pred = argmax_predictions[j]
                        predictions_form_argmax[j, pred] += 1
                    softmax_predictions = sess.run(
                        softmax_y_conv,
                        feed_dict={
                            x: adv_images,
                            noise: (_BenignLNoise / 10 + Noise),
                            FM_h: perturbFM_h
                        }) * sess.run(softmax_y_conv,
                                      feed_dict={
                                          x: adv_images,
                                          noise: Noise,
                                          FM_h:
                                          (_perturbFM_h / 10 + perturbFM_h)
                                      })
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                final_predictions = predictions_form_argmax
                print('final predictions')
                print(np.argmax(final_predictions, axis=1)[0:100])
                is_correct = []
                is_robust = []
                for j in range(test_batch_size):
                    is_correct.append(
                        np.argmax(test_batch[1][j]) == np.argmax(
                            final_predictions[j]))
                    robustness_from_argmax = robustness.robustness_size_argmax(
                        counts=predictions_form_argmax[j],
                        eta=0.05,
                        dp_attack_size=fgsm_eps,
                        dp_epsilon=dp_epsilon,
                        dp_delta=0.05,
                        dp_mechanism='laplace') / dp_mult
                    is_robust.append(robustness_from_argmax >= fgsm_eps)
                adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_batch_size
                robust_adv_acc_dict[atk] = np.sum([
                    a and b for a, b in zip(is_robust, is_correct)
                ]) * 1.0 / np.sum(is_robust)
                robust_adv_utility_dict[atk] = np.sum(
                    is_robust) * 1.0 / test_batch_size
                ##############################
            log_str = 'testing, eps: {}; steps: {};'.format(
                fgsm_eps, testing_step)
            for atk in attacks:
                log_str += "\n{}: {:.4f} {:.4f} {:.4f} {:.4f} ".format(
                    atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                    robust_adv_utility_dict[atk],
                    robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
            print(log_str, flush=True)
    tf.reset_default_graph()
Ejemplo n.º 14
0
def main(argv=None):

    keras.layers.core.K.set_learning_phase(1)
    manual_variable_initialization(True)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get toy samples
    # X_train, Y_train, X_test, Y_test = toysamples()
    # X_train, Y_train, X_test, Y_test = data_mnist_flat()
    X_train, Y_train, X_test, Y_test = data_mnist()

    # Define input TF placeholder
    # x = tf.placeholder(tf.float32, shape=(None, 784))
    # y = tf.placeholder(tf.float32, shape=(None, 10))
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph ( NOT adv training)

    # model = cnn_model_keras(activation='elu')
    # model = dense_model_keras(activation='elu')
    predictions = cnn_model(x)

    # Attackers: WRM---FGSM---IFGM
    wrm = WassersteinRobustMethod(cnn_model, sess=sess)
    wrm_params = {'eps': 1.3, 'ord': 2, 'y': y, 'steps': 15}
    predictions_adv_wrm = cnn_model(wrm.generate(x, **wrm_params))

    fgsm = FastGradientMethod(cnn_model, sess=sess)
    fgsm_params = {'eps': 0.1, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    adv_fgsm = fgsm.generate(x, **fgsm_params)
    adv_fgsm = tf.stop_gradient(adv_fgsm)
    preds_adv_fgsm = cnn_model(adv_fgsm)

    ifgm = BasicIterativeMethod(cnn_model, sess=sess)
    ifgm_params = {
        'eps': 0.1,
        'ord': np.inf,
        'eps_iter': 0.02,
        'nb_iter': 10,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_ifgm = ifgm.generate(x, **ifgm_params)
    adv_ifgm = tf.stop_gradient(adv_ifgm)
    preds_adv_ifgm = cnn_model(adv_ifgm)

    pgm = MadryEtAl(cnn_model, sess=sess)
    pgm_params = {
        'eps': 0.1,
        'ord': np.inf,
        'eps_iter': 0.01,
        'nb_iter': 30,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_pgm = pgm.generate(x, **pgm_params)
    adv_pgm = tf.stop_gradient(adv_pgm)
    preds_adv_pgm = cnn_model(adv_pgm)

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: %0.4f' % accuracy)

        # Accuracy of the model on Wasserstein adversarial examples
        # accuracy_adv_wass = model_eval(sess, x, y, predictions_adv_wrm, X_test, \
        #                                Y_test, args=eval_params)
        # print('Test accuracy on Wasserstein examples: %0.4f' % accuracy_adv_wass)

        # Accuracy of the model on FGSM adversarial examples
        accuracy_adv_fgsm = model_eval(sess, x, y, preds_adv_fgsm, X_test, \
                                       Y_test, args=eval_params)
        print('Test accuracy on fgsm examples: %0.4f' % accuracy_adv_fgsm)

        # Accuracy of the model on IFGM adversarial examples
        accuracy_adv_ifgm = model_eval(sess, x, y, preds_adv_ifgm, X_test, \
                                       Y_test, args=eval_params)
        print('Test accuracy on ifgm examples: %0.4f' % accuracy_adv_ifgm)

        # Accuracy of the model on PGM adversarial examples
        # accuracy_adv_pgm = model_eval(sess, x, y, preds_adv_pgm, X_test, \
        #                                Y_test, args=eval_params)
        # print('Test accuracy on pgm examples: %0.4f\n' % accuracy_adv_pgm)

    # Train the model
    # model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate, \
    #             args=train_params, save=False)
    model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate, \
                    regulizer=True, regcons=0.5, model=cnn_model, lossregfunc=True, args=train_params, save=False)
def prep_bbox(sess,
              logits_scalar,
              x,
              y,
              X_train,
              Y_train,
              X_test,
              Y_test,
              img_rows,
              img_cols,
              channels,
              nb_epochs,
              batch_size,
              learning_rate,
              rng,
              phase=None,
              binary=False,
              scale=False,
              nb_filters=64,
              model_path=None,
              adv=0,
              delay=0,
              eps=0.3):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param rng: numpy.random.RandomState
    :return:
    """

    # Define TF model graph (for the black-box model)
    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            #from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            # model = make_scaled_binary_cnn(phase, 'bb_binsc_', input_shape=(
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
            model = make_scaled_binary_rand_cnn(phase,
                                                logits_scalar,
                                                'bb_binsc_',
                                                input_shape=(None, img_rows,
                                                             img_cols,
                                                             channels),
                                                nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bb_bin_',
                                          input_shape=(None, img_rows,
                                                       img_cols, channels),
                                          nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase,
                               logits_scalar,
                               'bb_fp_',
                               input_shape=(None, img_rows, img_cols,
                                            channels),
                               nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Print out the accuracy on legitimate data
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        print('Test accuracy of black-box on legitimate test '
              'examples: %.4f' % acc)

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'bb train loss',
        'filename': 'bb_model',
        'train_scope': 'bb_model',
        'reuse_global_step': False,
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            nb_iter = 20
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        if adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)

        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    phase=phase,
                    evaluate=evaluate,
                    args=train_params,
                    save=save,
                    rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)

    accuracy = evaluate()

    return model, preds, accuracy, model_path
Ejemplo n.º 16
0
def main(_):
    tf.logging.set_verbosity(tf.logging.DEBUG)

    # Images for inception classifier are normalized to be in [-1, 1] interval,
    num_classes = 1001
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]

    # Load ImageNet Class Labels
    with open('labels.json') as f:
        labels = json.load(f)

    # Prepare Graph
    with tf.Graph().as_default():

        # Build Model
        if FLAGS.model_arch.lower() == 'resnet_v2_101':
            model = models.Resnet_V2_101_Model(num_classes)
            exceptions = []

        elif FLAGS.model_arch.lower() == 'inception_v3':
            model = models.Inception_V3_Model(num_classes)
            exceptions = ['InceptionV3/AuxLogits.*']

        else:
            raise ValueError('Invalid model architecture specified: {}'.format(
                FLAGS.model_arch))

        # Define Model Variables
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        FastGradientMethod(model).generate(x_input)
        model_variables = tf.contrib.framework.filter_variables(
            slim.get_model_variables(), exclude_patterns=exceptions)

        # Load Session
        saver = tf.train.Saver(model_variables)
        with tf.train.SessionManager().prepare_session(
                master=FLAGS.master,
                checkpoint_filename_with_path=FLAGS.checkpoint_path,
                saver=saver) as sess:

            # For Targeted Attacks
            target_idx = 0  # This will vary
            target = tf.constant(0, shape=[FLAGS.batch_size, num_classes])
            #      target = np.zeros((FLAGS.batch_size, num_classes), dtype=np.uint32)
            #      target[:, target] = 1

            # Build Attack
            if FLAGS.attack_type.lower() == 'fgsm':
                fgsm_opts = {
                    'eps': 0.3,
                    'clip_min': 0,
                    'clip_max': 1.,
                    'y_target': None
                }
                fgsm = FastGradientMethod(model)
                x_adv = fgsm.generate(x_input, **fgsm_opts)

            elif FLAGS.attack_type.lower() == 'bim':
                bim_opts = {
                    'eps': 0.3,
                    'clip_min': 0.,
                    'clip_max': 1.,
                    'y_target': None
                }
                bim = BasicIterativeMethod(model)
                x_adv = bim.generate(x_input, **bim_opts)

            elif FLAGS.attack_type.lower() == 'mim':
                mim_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                mim = MomentumIterativeMethod(model)
                x_adv = mim.generate(x_input, **mim_opts)

            elif FLAGS.attack_type.lower() == 'pgd':
                pgd_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                pgd = MadryEtAl(model)
                x_adv = pgd.generate(x_input, **pgd_opts)

            # Broken
            elif FLAGS.attack_type.lower() == 'jsma':
                jsma_opts = {
                    'theta': 1.,
                    'gamma': 0.1,
                    'clip-min': 0.,
                    'clip-max': 1.,
                    'y_target': None
                }
                jsma = SaliencyMapMethod(model)
                x_adv = jsma.generate(x_input, **jsma_opts)

            elif FLAGS.attack_type.lower() == 'lbfgs':
                lbfgs_opts = {'y_target': target}
                lbfgs = LBFGS(model)
                x_adv = lbfgs.generate(x_input, **lbfgs_opts)

            else:
                raise ValueError('Invalid attack type specified: {}'.format(
                    FLAGS.attack_type))

            start_time, batch_time, num_processed = time.time(), time.time(), 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)

                if FLAGS.show_predictions:
                    preds = sess.run(model(np.float32(images)))
                    probs = np.amax(preds, axis=1)
                    classes = np.argmax(preds, axis=1)
                    adv_preds = sess.run(model(adv_images))
                    adv_probs = np.amax(adv_preds, axis=1)
                    adv_classes = np.argmax(adv_preds, axis=1)

                    for i, _ in enumerate(filenames):
                        print('\nOriginal: {:.2f}% ({})\nAdversarial: {:.2f}% ({})'.format( \
                          probs[i]*100, labels[str(classes[i])], adv_probs[i]*100, labels[str(adv_classes[i])]))

                time_delta = time.time() - batch_time
                batch_time = time.time()
                num_processed += len(filenames)
                print('[SPEED ESTIMATION] BatchRate={:.4f} Hz; AverageRate={:.4f} Hz'.format( \
                  (len(filenames) / time_delta * 1.0), ((num_processed * 1.0) / (batch_time - start_time))))
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2,
          eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile,
          parameter_dict):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" %
                  (fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    # make sure variables are placed on cpu
    # TODO: for AWS version, check if put variables on GPU will be better
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, trainable=False)
        attacks = ['ifgsm', 'mim', 'madry']

        # manually create all scopes
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope:
            scope_conv1 = scope
        with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope:
            scope_conv2 = scope
        with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope:
            scope_conv3 = scope
        with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope:
            scope_local4 = scope
        with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope:
            scope_local5 = scope

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])):
        with tf.variable_scope(scope_conv1) as scope:
            kernel1 = _variable_with_weight_decay(
                'kernel1',
                shape=[4, 4, 3, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[AECODER_VARIABLES])
            biases1 = _bias_on_cpu('biases1', [128],
                                   tf.constant_initializer(0.0),
                                   collect=[AECODER_VARIABLES])

        #
        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity)

        with tf.variable_scope(scope_conv2) as scope:
            kernel2 = _variable_with_weight_decay(
                'kernel2',
                shape=[5, 5, 128, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases2 = _bias_on_cpu('biases2', [128],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_conv3) as scope:
            kernel3 = _variable_with_weight_decay(
                'kernel3',
                shape=[5, 5, 256, 256],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases3 = _bias_on_cpu('biases3', [256],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local4) as scope:
            kernel4 = _variable_with_weight_decay(
                'kernel4',
                shape=[int(image_size / 4)**2 * 256, hk],
                stddev=0.04,
                wd=0.004,
                collect=[CONV_VARIABLES])
            biases4 = _bias_on_cpu('biases4', [hk],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local5) as scope:
            kernel5 = _variable_with_weight_decay(
                'kernel5', [hk, 10],
                stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) /
                math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases5 = _bias_on_cpu('biases5', [10],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        # group these for use as parameters
        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        scopes = [
            scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5
        ]

        # placeholders for input values
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])  # one time
        noise = tf.placeholder(tf.float32,
                               [None, image_size, image_size, 3])  # one time
        adv_noise = tf.placeholder(
            tf.float32, [None, image_size, image_size, 3])  # one time

        x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3
                                           ])  # input is the bunch of n_batchs
        x_list = tf.split(x_sb, N_GPUS, axis=0)  # split it into each batch
        adv_x_sb = tf.placeholder(tf.float32,
                                  [None, image_size, image_size, 3])
        adv_x_list = tf.split(adv_x_sb, N_GPUS, axis=0)

        x_test = tf.placeholder(tf.float32, [None, image_size, image_size, 3])

        y_sb = tf.placeholder(tf.float32,
                              [None, 10])  # input is the bunch of n_batchs
        y_list = tf.split(y_sb, N_GPUS, axis=0)  # split it into each batch
        adv_y_sb = tf.placeholder(tf.float32,
                                  [None, 10])  # input is the bunch of n_batchs
        # adv_y_list = tf.split(adv_y_sb, N_GPUS, axis=0) # split it into each batch

        y_test = tf.placeholder(tf.float32, [None, 10])

        # re-arrange the input samples
        _split_adv_y_sb = tf.split(adv_y_sb, N_AUX_GPUS, axis=0)
        reorder_adv_y_sb = []
        for i in range(N_GPUS):
            reorder_adv_y_sb.append(
                tf.concat([
                    _split_adv_y_sb[i + N_GPUS * atk_index]
                    for atk_index in range(len(attacks))
                ],
                          axis=0))

        tower_pretrain_grads = []
        tower_train_grads = []
        all_train_loss = []

        pretrain_opt = tf.train.AdamOptimizer(learning_rate)
        train_opt = tf.train.GradientDescentOptimizer(learning_rate)

        # batch index
        bi = 0
        for gpu in GPU_IDX:
            # putting ops on each tower (GPU)
            with tf.device('/gpu:{}'.format(gpu)):
                print('Train inference GPU placement')
                print('/gpu:{}'.format(gpu))
                # Auto-Encoder #
                # pretrain_adv and pretrain_benign are cost tensor of the encoding layer
                with tf.variable_scope(scope_conv1) as scope:
                    Enc_Layer2 = EncLayer(inpt=adv_x_list[bi],
                                          n_filter_in=3,
                                          n_filter_out=128,
                                          filter_size=3,
                                          W=kernel1,
                                          b=biases1,
                                          activation=tf.nn.relu)
                    pretrain_adv = Enc_Layer2.get_train_ops2(
                        xShape=tf.shape(adv_x_list[bi])[0],
                        Delta=Delta2,
                        epsilon=epsilon2,
                        batch_size=L,
                        learning_rate=learning_rate,
                        W=kernel1,
                        b=biases1,
                        perturbFMx=adv_noise,
                        perturbFM_h=FM_h,
                        bn_index=bi)
                    Enc_Layer3 = EncLayer(inpt=x_list[bi],
                                          n_filter_in=3,
                                          n_filter_out=128,
                                          filter_size=3,
                                          W=kernel1,
                                          b=biases1,
                                          activation=tf.nn.relu)
                    pretrain_benign = Enc_Layer3.get_train_ops2(
                        xShape=tf.shape(x_list[bi])[0],
                        Delta=Delta2,
                        epsilon=epsilon2,
                        batch_size=L,
                        learning_rate=learning_rate,
                        W=kernel1,
                        b=biases1,
                        perturbFMx=noise,
                        perturbFM_h=FM_h,
                        bn_index=bi)
                    pretrain_cost = pretrain_adv + pretrain_benign
                # this cost is not used
                # cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0);

                # benign conv output
                x_image = x_list[bi] + noise
                y_conv = inference(x_image,
                                   FM_h,
                                   params,
                                   scopes,
                                   training=True,
                                   bn_index=bi)
                # softmax_y_conv = tf.nn.softmax(y_conv)

                # adv conv output
                adv_x_image = adv_x_list[bi] + adv_noise
                y_adv_conv = inference(adv_x_image,
                                       FM_h,
                                       params,
                                       scopes,
                                       training=True,
                                       bn_index=bi)

                # Calculate loss. Apply Taylor Expansion for the output layer
                perturbW = perturbFM * params[8]
                train_loss = cifar10.TaylorExp(y_conv, y_list[bi], y_adv_conv,
                                               reorder_adv_y_sb[bi], L, alpha,
                                               perturbW)
                all_train_loss.append(train_loss)

                # list of variables to train
                pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
                train_var_list = tf.get_collection(CONV_VARIABLES)

                # compute tower gradients
                pretrain_grads = pretrain_opt.compute_gradients(
                    pretrain_cost, var_list=pretrain_var_list)
                train_grads = train_opt.compute_gradients(
                    train_loss, var_list=train_var_list)
                # get_pretrain_grads(pretrain_cost, global_step, learning_rate, pretrain_var_list)
                # train_grads = get_train_grads(train_loss, global_step, learning_rate, train_var_list)

                # note this list contains grads and variables
                tower_pretrain_grads.append(pretrain_grads)
                tower_train_grads.append(train_grads)

                # batch index
                bi += 1

        # average the gradient from each tower
        pretrain_var_dict = {}
        all_pretrain_grads = {}
        avg_pretrain_grads = []
        for var in tf.get_collection(AECODER_VARIABLES):
            if var.name not in all_pretrain_grads:
                all_pretrain_grads[var.name] = []
                pretrain_var_dict[var.name] = var
        for tower in tower_pretrain_grads:
            for var_grad in tower:
                all_pretrain_grads[var_grad[1].name].append(var_grad[0])
        for var_name in all_pretrain_grads:
            # expand dim 0, then concat on dim 0, then reduce mean on dim 0
            expand_pretrain_grads = [
                tf.expand_dims(g, 0) for g in all_pretrain_grads[var_name]
            ]
            concat_pretrain_grads = tf.concat(expand_pretrain_grads, axis=0)
            reduce_pretrain_grads = tf.reduce_mean(concat_pretrain_grads, 0)
            # rebuild (grad, var) list
            avg_pretrain_grads.append(
                (reduce_pretrain_grads, pretrain_var_dict[var_name]))
        print('*****************************')
        print("avg_pretrain_grads:")
        for avg_pretrain_grad in avg_pretrain_grads:
            print('grads')
            print((avg_pretrain_grad[0].name, avg_pretrain_grad[0].shape))
            print('var')
            print((avg_pretrain_grad[1].name, avg_pretrain_grad[1].shape))
            print('------')

        train_var_dict = {}
        all_train_grads = {}
        avg_train_grads = []
        for var in tf.get_collection(CONV_VARIABLES):
            if var.name not in all_train_grads:
                all_train_grads[var.name] = []
                train_var_dict[var.name] = var
        for tower in tower_train_grads:
            for var_grad in tower:
                all_train_grads[var_grad[1].name].append(var_grad[0])
        for var_name in all_train_grads:
            # expand dim 0, then concat on dim 0, then reduce mean on dim 0
            expand_train_grads = [
                tf.expand_dims(g, 0) for g in all_train_grads[var_name]
            ]
            concat_train_grads = tf.concat(expand_train_grads, axis=0)
            reduce_train_grads = tf.reduce_mean(concat_train_grads, 0)
            # rebuild (grad, var) list
            avg_train_grads.append(
                (reduce_train_grads, train_var_dict[var_name]))
        print('*****************************')
        print("avg_train_grads:")
        for avg_train_grad in avg_train_grads:
            print('grads')
            print((avg_train_grad[0].name, avg_train_grad[0].shape))
            print('var')
            print((avg_train_grad[1].name, avg_train_grad[1].shape))
            print('------')
        print('*****************************')

        # get averaged loss tensor
        avg_loss = tf.reduce_mean(tf.stack(all_train_loss), axis=0)

        # TODO: take the average of the bn variables from each tower/training GPU
        # currently, testing is using the bn variables on bn_index 0 (tower/training GPU 0)

        # build train op (apply average gradient to variables)
        # according to 1.13 doc, updates need to be manually applied
        _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        print('update ops:')
        print(_update_ops)

        with tf.control_dependencies(_update_ops):
            pretrain_op = pretrain_opt.apply_gradients(avg_pretrain_grads,
                                                       global_step=global_step)
            train_op = train_opt.apply_gradients(avg_train_grads,
                                                 global_step=global_step)

        # start a session with memory growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print("session created")

        # init kernel 1 and get some values from it
        sess.run(kernel1.initializer)
        dp_epsilon = 0.005
        parameter_dict['dp_epsilon'] = dp_epsilon
        _gamma = sess.run(gamma)
        _gamma_x = Delta2 / L
        epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x)
        parameter_dict['epsilon2_update'] = epsilon2_update
        print(epsilon2_update / _gamma + epsilon2_update / _gamma_x)
        print(epsilon2_update)
        # NOTE: these values needs to be calculated in testing
        delta_r = fgsm_eps * (image_size**2)
        parameter_dict['delta_r'] = delta_r
        _sensitivityW = sess.run(sensitivity)
        parameter_dict['_sensitivityW'] = _sensitivityW
        delta_h = _sensitivityW * (14**2)
        parameter_dict['delta_h'] = delta_h
        #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
        dp_mult = (Delta2) / (L * epsilon2_update * (delta_h / 2 + delta_r))
        parameter_dict['dp_mult'] = dp_mult

        # place test-time inference into CPU
        with tf.device('/cpu:0'):
            # testing pipeline
            test_x_image = x_test + noise
            test_y_conv = inference(test_x_image,
                                    FM_h,
                                    params,
                                    scopes,
                                    training=True,
                                    bn_index=0)
            test_softmax_y_conv = tf.nn.softmax(test_y_conv)

        # ============== attacks ================
        iter_step_training = 3
        parameter_dict['iter_step_training'] = iter_step_training
        # iter_step_testing = 1000
        aux_dup_count = N_GPUS
        # split input x_super_batch into N_AUX_GPUS parts
        x_attacks = tf.split(x_sb, N_AUX_GPUS, axis=0)
        # split input x_test into aux_dup_count parts
        x_test_split = tf.split(x_test, aux_dup_count, axis=0)

        # setup all attacks
        # attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            scopes=scopes,
            image_size=image_size,
            adv_noise=adv_noise)
        attack_tensor_training_dict = {}
        attack_tensor_testing_dict = {}

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])

        # build each attack
        for atk_idx in range(len(attacks)):
            atk = attacks[atk_idx]
            print('building attack {} tensors'.format(atk))
            # for each gpu assign to each attack
            attack_tensor_training_dict[atk] = []
            attack_tensor_testing_dict[atk] = []
            for i in range(aux_dup_count):
                if atk == 'ifgsm':
                    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[i])):
                        print('ifgsm GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i]))
                        # ifgsm tensors for training
                        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs,
                                                         sess=sess)
                        attack_tensor_training_dict[atk].append(
                            ifgsm_obj.generate(x=x_attacks[i],
                                               eps=mu_alpha,
                                               eps_iter=mu_alpha /
                                               iter_step_training,
                                               nb_iter=iter_step_training,
                                               clip_min=-1.0,
                                               clip_max=1.0))

                elif atk == 'mim':
                    with tf.device('/gpu:{}'.format(
                            AUX_GPU_IDX[i + 1 * aux_dup_count])):
                        print('mim GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i + 1 * aux_dup_count]))
                        # mim tensors for training
                        mim_obj = MomentumIterativeMethod(model=ch_model_probs,
                                                          sess=sess)
                        attack_tensor_training_dict[atk].append(
                            mim_obj.generate(
                                x=x_attacks[i + 1 * aux_dup_count],
                                eps=mu_alpha,
                                eps_iter=mu_alpha / iter_step_training,
                                nb_iter=iter_step_training,
                                decay_factor=1.0,
                                clip_min=-1.0,
                                clip_max=1.0))

                elif atk == 'madry':
                    with tf.device('/gpu:{}'.format(
                            AUX_GPU_IDX[i + 2 * aux_dup_count])):
                        print('madry GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i + 2 * aux_dup_count]))
                        # madry tensors for training
                        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
                        attack_tensor_training_dict[atk].append(
                            madry_obj.generate(
                                x=x_attacks[i + 2 * aux_dup_count],
                                eps=mu_alpha,
                                eps_iter=mu_alpha / iter_step_training,
                                nb_iter=iter_step_training,
                                clip_min=-1.0,
                                clip_max=1.0))

        # combine all attack tensors
        adv_concat_list = []
        for i in range(aux_dup_count):
            adv_concat_list.append(
                tf.concat(
                    [attack_tensor_training_dict[atk][i] for atk in attacks],
                    axis=0))
        # the tensor that contains each batch of adv samples for training
        # has same sample order as the labels
        adv_super_batch_tensor = tf.concat(adv_concat_list, axis=0)

        #====================== attack =========================

        #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params)

        print('******************** debug info **********************')
        # list of variables to train
        pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
        print('pretrain var list')
        for v in pretrain_var_list:
            print((v.name, v.shape))
        print('**********************************')
        train_var_list = tf.get_collection(CONV_VARIABLES)
        print('train var list')
        for v in train_var_list:
            print((v.name, v.shape))
        print('**********************************')

        # all variables
        print('all variables')
        vl = tf.global_variables()
        for v in vl:
            print((v.name, v.shape))
        print('**********************************')

        # all ops
        ops = [n.name for n in tf.get_default_graph().as_graph_def().node]
        print('total number of ops')
        print(len(ops))
        # for op in ops:
        #   print(op)
        print('******************** debug info **********************')
        # exit()

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        sess.run(init)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps
        print('total number of steps: {}'.format(T))
        step_for_epoch = int(math.ceil(D / L))
        #number of steps for one epoch
        parameter_dict['step_for_epoch'] = step_for_epoch
        print('step_for_epoch: {}'.format(step_for_epoch))

        # generate some fixed noise
        perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128)  # one time
        perturbH_test = np.reshape(perturbH_test,
                                   [-1, 14, 14, 128])  # one time
        parameter_dict['perturbH_test'] = perturbH_test
        print('perturbH_test')
        print(perturbH_test.shape)

        perturbFM_h = np.random.laplace(0.0,
                                        2 * Delta2 / (epsilon2_update * L),
                                        14 * 14 * 128)  # one time
        perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128])  # one time
        parameter_dict['perturbFM_h'] = perturbFM_h
        print('perturbFM_h')
        print(perturbFM_h.shape)

        Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update,
                                  L)  # one time
        parameter_dict['Noise'] = Noise
        Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update,
                                       L)  # one time
        parameter_dict['Noise_test'] = Noise_test
        print('Noise and Noise_test')
        print(Noise.shape)
        print(Noise_test.shape)
        # exit()

        # some timing variables
        adv_duration_total = 0.0
        adv_duration_count = 0
        train_duration_total = 0.0
        train_duration_count = 0

        # some debug flag
        adv_batch_flag = True
        batch_flag = True
        L_flag = True
        parameter_flag = True

        _global_step = 0
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            # TODO: fix this
            d_eps = random.random() * 0.5
            # d_eps = 0.25
            print('d_eps: {}'.format(d_eps))

            # version with 3 AUX GPU
            # get two super batchs, one for benign training, one for adv training
            super_batch_images, super_batch_labels = cifar10_data.train.next_super_batch(
                N_GPUS, random=True)
            super_batch_images_for_adv, super_batch_adv_labels = cifar10_data.train.next_super_batch_premix_ensemble(
                N_GPUS, random=True)

            # TODO: re-arrange the adv labels to match the adv samples

            # run adv_tensors_batch_concat to generate adv samples
            super_batch_adv_images = sess.run(adv_super_batch_tensor,
                                              feed_dict={
                                                  x_sb:
                                                  super_batch_images_for_adv,
                                                  adv_noise: Noise,
                                                  mu_alpha: [d_eps]
                                              })

            adv_finish_time = time.time()
            adv_duration = adv_finish_time - start_time
            adv_duration_total += adv_duration
            adv_duration_count += 1

            if adv_batch_flag:
                print(super_batch_images.shape)
                print(super_batch_labels.shape)
                print(super_batch_adv_images.shape)
                print(super_batch_adv_labels.shape)
                adv_batch_flag = False

            if batch_flag:
                print(super_batch_images.shape)
                print(super_batch_labels.shape)
                batch_flag = False

            if L_flag:
                print("L: {}".format(L))
                L_flag = False

            if parameter_flag:
                print('*=*=*=*=*')
                print(parameter_dict)
                print('*=*=*=*=*', flush=True)
                logfile.write('*=*=*=*=*\n')
                logfile.write(str(parameter_dict))
                logfile.write('*=*=*=*=*\n')
                parameter_flag = False

            _, _, avg_loss_value = sess.run(
                [pretrain_op, train_op, avg_loss],
                feed_dict={
                    x_sb: super_batch_images,
                    y_sb: super_batch_labels,
                    adv_x_sb: super_batch_adv_images,
                    adv_y_sb: super_batch_adv_labels,
                    noise: Noise,
                    adv_noise: Noise_test,
                    FM_h: perturbFM_h
                })

            assert not np.isnan(
                avg_loss_value), 'Model diverged with loss = NaN'

            train_finish_time = time.time()
            train_duration = train_finish_time - adv_finish_time
            train_duration_total += train_duration
            train_duration_count += 1

            # save model every 50 epochs
            if step % (50 * step_for_epoch) == 0 and (step >=
                                                      50 * step_for_epoch):
                print('saving model')
                checkpoint_path = os.path.join(os.getcwd() + dirCheckpoint,
                                               'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            # Save the model checkpoint periodically.
            # if step % (10*step_for_epoch) == 0 and (step > _global_step):
            if step % 10 == 0 and (step > _global_step):
                # print n steps and time
                print("current epoch: {:.2f}".format(step / step_for_epoch))
                num_examples_per_step = L * N_GPUS * 2
                avg_adv_duration = adv_duration_total / adv_duration_count
                avg_train_duration = train_duration_total / train_duration_count
                avg_total_duration = avg_adv_duration + avg_train_duration
                examples_per_sec = num_examples_per_step / avg_total_duration
                sec_per_step = avg_total_duration
                # sec_per_batch = sec_per_step / (N_GPUS * 2)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.2f '
                    'sec/step; %.2f sec/adv_gen_op; %.2f sec/train_op)')
                actual_str = format_str % (
                    datetime.now(), step, avg_loss_value, examples_per_sec,
                    sec_per_step, avg_adv_duration, avg_train_duration)
                print(actual_str, flush=True)
                logfile.write(actual_str + '\n')
        }

        #from cleverhans.attacks import FastGradientMethod
        #attacker = FastGradientMethod(model, back='tf', sess=sess)

        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)

        max_eps = 16
        epsilons = np.linspace(1, max_eps, max_eps)
        #epsilons = np.linspace(0, max_eps, max_eps // 4, endpoint=False)
        eval_par = {'batch_size': eval_batch_size}
        for e in epsilons:
            start_time = time.time()
            attack_params.update({'eps': e})
            x_adv = attacker.generate(x, **attack_params)
            preds_adv = model.get_probs(x_adv)
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test[:nb_samples],
                             Y_test[:nb_samples],
                             args=eval_par)
            print('%.2f, %.4f, %f' % (e, acc, time.time() - start_time))
        '''
        # attacker can be any CleverHans attack here,
        # but some attacks creating internal variables not
        # yet tested.
        attack_params = {'batch_size': eval_batch_size,
                         'clip_min': 0., 'clip_max': 255.}
Ejemplo n.º 19
0
def test():
    """
    """
    tf.reset_default_graph()
    g = tf.get_default_graph()

    with g.as_default():
        # Placeholder nodes.
        images_holder = tf.placeholder(
            tf.float32,
            [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS])
        label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES])
        is_training = tf.placeholder(tf.bool, ())

        # model
        model = model_cifar100.RDPCNN(images_holder, label_holder,
                                      FLAGS.INPUT_SIGMA,
                                      is_training)  # for adv examples

        model_loss = model.loss()
        model_acc = model.cnn_accuracy

        # robust
        def inference(x):
            logits, _ = model.cnn.prediction(x)
            return logits

        def inference_prob(x):
            _, probs = model.cnn.prediction(x)
            return probs

        graph_dict = {}
        graph_dict["images_holder"] = images_holder
        graph_dict["label_holder"] = label_holder
        graph_dict["is_training"] = is_training

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config, graph=g) as sess:
        sess.run(tf.global_variables_initializer())
        # load model
        model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME)

        # adv test
        ####################################################################################################
        x_advs = {}
        ch_model_logits = CallableModelWrapper(callable_fn=inference,
                                               output_layer='logits')
        ch_model_probs = CallableModelWrapper(callable_fn=inference_prob,
                                              output_layer='probs')
        # FastGradientMethod
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        x_advs["fgsm"] = fgsm_obj.generate(x=images_holder,
                                           eps=FLAGS.ATTACK_SIZE,
                                           clip_min=0.0,
                                           clip_max=1.0)  # testing now

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["mim"] = mim_obj.generate(x=images_holder,
                                         eps=FLAGS.ATTACK_SIZE,
                                         eps_iter=FLAGS.ATTACK_SIZE / 10,
                                         nb_iter=10,
                                         decay_factor=1.0,
                                         clip_min=0.0,
                                         clip_max=1.0)

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        x_advs["madry"] = madry_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)
        graph_dict["x_advs"] = x_advs
        ####################################################################################################

        # tensorboard writer
        #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g)
        print("\nTest")
        if FLAGS.local:
            total_test_batch = 2
        else:
            total_test_batch = None
        dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item()
        test_info(sess,
                  model,
                  True,
                  graph_dict,
                  dp_info,
                  FLAGS.TEST_LOG_FILENAME,
                  total_batch=total_test_batch)
        robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
Ejemplo n.º 20
0
def PDP_resnet_with_pretrain_adv(TIN_data, resnet_params, train_params, params_to_save):
  # dict for encoding layer variables and output layer variables
  pre_define_vars = {}

  # list of variables to train
  train_vars = []
  pretrain_vars = []

  with tf.Graph().as_default(), tf.device('/cpu:0'):
    global_step = tf.Variable(0, trainable=False)
    
    # Parameters Declarification
    ######################################
    
    # encoding (pretrain) layer variables
    with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
      kernel1 = tf.get_variable('kernel1', shape=[train_params.enc_kernel_size, train_params.enc_kernel_size, 
                                3, train_params.enc_filters], dtype=tf.float32, 
                                initializer=tf.contrib.layers.xavier_initializer_conv2d())
      biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, 
                                initializer=tf.constant_initializer(0.0))
    pre_define_vars['kernel1'] = kernel1
    pre_define_vars['biases1'] = biases1 
    train_vars.append(kernel1)
    train_vars.append(biases1)
    pretrain_vars.append(kernel1)
    pretrain_vars.append(biases1)

    shape     = kernel1.get_shape().as_list()
    w_t       = tf.reshape(kernel1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*train_params.Delta2/(train_params.effective_batch_size * sensitivity)
    print('gamma: {}'.format(gamma))
    
    # output layer variables
    with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope:
      stdv = 1.0 / math.sqrt(train_params.hk)
      final_w = tf.get_variable('kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, 
                                initializer=tf.random_uniform_initializer(-stdv, stdv))
      final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, 
                                initializer=tf.constant_initializer(0.0))
    pre_define_vars['final_w'] = final_w
    pre_define_vars['final_b'] = final_b 
    train_vars.append(final_w)
    train_vars.append(final_b)
    ######################################
    
    # Build a Graph that computes the logits predictions from the inputs
    ######################################
    # input placeholders
    x_sb = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb') # input is the bunch of n_batchs
    x_sb_adv = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb_adv')
    x_test = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_test')

    y_sb = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch)
    y_sb_adv = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb_adv')
    y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test')

    FM_h = tf.placeholder(tf.float32, [None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters], name='FM_h') # one time
    noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='noise') # one time
    adv_noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='adv_noise') # one time

    learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob')

    # list of grads for each GPU
    tower_pretrain_grads = []
    tower_train_grads = []
    all_train_loss = []

    # optimizers
    pretrain_opt = tf.train.AdamOptimizer(learning_rate)
    train_opt = tf.train.AdamOptimizer(learning_rate)

    # model and loss on one GPU
    with tf.device('/gpu:{}'.format(GPU_IDX[0])):
      # setup encoding layer training
      with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
        Enc_Layer2 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, 
                              W=kernel1, b=biases1, activation=tf.nn.relu)
        pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(x_sb_adv)[0], Delta=train_params.Delta2, 
                                                epsilon=train_params.epsilon2, batch_size=None, learning_rate=None,
                                                W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h)
        Enc_Layer3 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, 
                              W=kernel1, b=biases1, activation=tf.nn.relu)
        pretrain_benign = Enc_Layer3.get_train_ops2(xShape=tf.shape(x_sb)[0], Delta=train_params.Delta2, 
                                                    epsilon=train_params.epsilon2, batch_size=None, learning_rate=None,
                                                    W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h)
        pretrain_cost = tf.reduce_mean(pretrain_adv + pretrain_benign)
      print_var('pretrain_cost', pretrain_cost)
      
      # use standard loss first
      y_logits = inference(x_sb + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
      y_softmax = tf.nn.softmax(y_logits)

      y_logits_adv = inference(x_sb_adv + adv_noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
      y_softmax_adv = tf.nn.softmax(y_logits_adv)

      # taylor exp
      # TODO: use noise here
      perturbW = train_params.perturbFM * final_w
      # train_loss = TaylorExp_no_noise(y_softmax, y_sb, y_softmax_adv, y_sb_adv, 
      #                        train_params.effective_batch_size, train_params.alpha)
      train_loss = TaylorExp(y_softmax, y_sb, y_softmax_adv, y_sb_adv, 
                             train_params.effective_batch_size, train_params.alpha, perturbW)
      print_var('train_loss', train_loss)
      all_train_loss.append(train_loss)
    
    # split testing in each gpu
    x_sb_tests = tf.split(x_sb, N_ALL_GPUS, axis=0)
    y_softmax_test_list = []
    for gpu in range(N_ALL_GPUS):
      with tf.device('/gpu:{}'.format(gpu)):
        # testing graph now in each gpu
        y_logits_test = test_inference(x_sb_tests[gpu] + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
        y_softmax_test_list.append(tf.nn.softmax(y_logits_test))
    y_softmax_test_concat = tf.concat(y_softmax_test_list, axis=0)

    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    all_vars = tf.global_variables()
    print_var_list('all vars', all_vars)
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

    # add selected vars into trainable variable list
    # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or
    for var in tf.global_variables():
      if 'resnet_model' in var.name and \
        ('conv0' in var.name or 
        'fc' in var.name or 
        'res3' in var.name or 
        'res4' in var.name or 
        'res1' in var.name or 
        'res2' in var.name) and \
          ('gamma' in var.name or 
            'beta' in var.name or 
            'kernel' in var.name or
            'bias' in var.name):
        if var not in train_vars:
          train_vars.append(var)
      elif 'enc_layer' in var.name and \
        ('kernel' in var.name or
          'bias' in var.name):
        if var not in pretrain_vars:
          pretrain_vars.append(var)
        if var not in train_vars:
          train_vars.append(var)
      elif 'enc_layer' in var.name and \
        ('gamma' in var.name or 
          'beta' in var.name):
        if var not in pretrain_vars:
          pretrain_vars.append(var)
    
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    print_var_list('train_vars', train_vars)
    print_var_list('pretrain_vars', pretrain_vars)
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

    # op for compute grads on one gpu
    with tf.device('/gpu:{}'.format(GPU_IDX[0])):
      # get all update_ops (updates of moving averageand std) for batch normalizations
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      print_op_list('update ops', update_ops)
      enc_update_ops = [op for op in update_ops if 'enc_layer' in op.name]
      print_op_list('enc layer update ops', enc_update_ops)

      # when the gradients are computed, update the batch_norm
      with tf.control_dependencies(enc_update_ops):
        pretrain_grads = pretrain_opt.compute_gradients(pretrain_cost, var_list=pretrain_vars)
        print('*********** pretrain_grads ***********')
        for x in pretrain_grads:
          print(x)
        print('**********************')
      with tf.control_dependencies(update_ops):
        train_grads = train_opt.compute_gradients(train_loss, var_list=train_vars)
        print('*********** train_grads ***********')
        for x in train_grads:
          print(x)
        print('**********************')
      avg_pretrain_grads = pretrain_grads
      avg_train_grads = train_grads
      
      # get averaged loss tensor for pretrain and train ops
      total_loss = tf.reduce_sum(tf.stack(all_train_loss))
      total_pretrain_loss = tf.reduce_mean(pretrain_cost)

    # prepare to save gradients for large batch
    pretrain_grads_save = [g for g,v in pretrain_grads]
    # print('*********** pretrain_grads_save ***********' + str(pretrain_grads_save) + '**********************')
    train_grads_save = [g for g,v in train_grads]
    # print('*********** train_grads_save ***********' + str(train_grads_save) + '**********************')
    pretrain_grads_shapes = [g.shape.as_list() for g in pretrain_grads_save]
    train_grads_shapes = [g.shape.as_list() for g in train_grads_save]

    # placeholders for importing saved gradients
    pretrain_grads_placeholders = []
    for g,v in pretrain_grads:
      pretrain_grads_placeholders.append(tf.placeholder(tf.float32, v.shape))

    train_grads_placeholders = []
    for g,v in train_grads:
      train_grads_placeholders.append(tf.placeholder(tf.float32, v.shape))

    # construct the (grad, var) list
    assemble_pretrain_grads = []
    for i in range(len(pretrain_vars)):
      assemble_pretrain_grads.append((pretrain_grads_placeholders[i], pretrain_vars[i]))
    
    assemble_train_grads = []
    for i in range(len(train_grads)):
      assemble_train_grads.append((train_grads_placeholders[i], train_vars[i]))
    
    # apply the saved gradients
    pretrain_op = pretrain_opt.apply_gradients(assemble_pretrain_grads, global_step=global_step)
    train_op = train_opt.apply_gradients(assemble_train_grads, global_step=global_step)
    ######################################

    # Create a saver.
    saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)
    
    # start a session with memory growth
    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    print("session created")

    # get some initial values
    sess.run(kernel1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = train_params.Delta2 / train_params.effective_batch_size
    epsilon2_update = train_params.epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    delta_r = train_params.fgsm_eps * (train_params.image_size ** 2)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(train_params.enc_h_size ** 2)
    #dp_mult = (train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update)) / (delta_r / train_params.dp_epsilon) + \
    #  (2 * train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update))/(delta_h / train_params.dp_epsilon)
    dp_mult = (train_params.Delta2*train_params.dp_epsilon) / (train_params.effective_batch_size*epsilon2_update * (delta_h / 2 + delta_r))
    # save some valus for testing
    params_to_save['epsilon2_update'] = epsilon2_update
    params_to_save['dp_mult'] = dp_mult

    #######################################
    # ADV attacks
    #######################################

    # split input for attacks
    x_attacks = tf.split(x_sb, 3, axis=0) # split it into each batch
    
    # currently only ifgsm, mim, and madry attacks are available
    attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    
    # wrap the inference
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_output_probs, output_layer='probs', 
                                                adv_noise=adv_noise, keep_prob=keep_prob, pre_define_vars=pre_define_vars, 
                                                resnet_params=resnet_params, train_params=train_params)
    
    # to save the reference to the attack tensors
    attack_tensor_training_dict = {}
    attack_tensor_testing_dict = {}

    # placeholder for eps parameter
    mu_alpha = tf.placeholder(tf.float32, [1])
      
    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])):
      print('ifgsm GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[0]))
      if attack_switch['ifgsm']:
          print('creating attack tensor of BasicIterativeMethod')
          ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['ifgsm'] = ifgsm_obj.generate(x=x_attacks[0], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0)

    # MomentumIterativeMethod
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[1])):
      print('mim GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[1]))
      if attack_switch['mim']:
          print('creating attack tensor of MomentumIterativeMethod')
          mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['mim'] = mim_obj.generate(x=x_attacks[1], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['mim'] = mim_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
      
    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[2])):
      print('madry GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[2]))
      if attack_switch['madry']:
          print('creating attack tensor of MadryEtAl')
          madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['madry'] = madry_obj.generate(x=x_attacks[2], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['madry'] = madry_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0)

    # combine the tensors
    adv_tensors_concat = tf.concat([attack_tensor_training_dict[x] for x in train_params.attacks], axis=0)
    #######################################

    # init op
    print('initialize_all_variables')
    init = tf.initialize_all_variables()
    sess.run(init)

    # load pretrained variables of RESNET
    if train_params.load_weights:
      # first we need to load variable name convert table
      tgt_var_name_dict = {}
      with open(train_params.weight_table_path, 'r', encoding='utf-8') as inf:
        lines = inf.readlines()
        for line in lines:
          var_names = line.strip().split(' ')
          if var_names[1] == 'NONE':
            continue
          else:
            tgt_var_name_dict[var_names[0]] = var_names[1]

      # load variables dict from checkpoint
      pretrained_var_dict = load_pretrained_vars()

      # load pre-trained vars using name convert table
      for var in tf.global_variables():
        if var.name in tgt_var_name_dict:
          # print('var \"{}\" found'.format(var.name))
          try:
            var.load(pretrained_var_dict[tgt_var_name_dict[var.name]], session=sess)
            print('{} loaded'.format(var.name))
          except:
            print('var {} not loaded since shape changed'.format(var.name))
        else:
          if 'Adam' not in var.name:
            print('var \"{}\" NOT FOUND'.format(var.name))
    else:
      print('Training model from scratch')


    #####################################
    # init noise and save for testing
    perturbH_test = np.random.laplace(0.0, 0, train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
    perturbH_test = np.reshape(perturbH_test, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
    params_to_save['perturbH_test'] = perturbH_test
    
    perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), 
                                        train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
    perturbFM_h = np.reshape(perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
    params_to_save['perturbFM_h'] = perturbFM_h

    Noise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size)
    params_to_save['Noise'] = Noise

    Noise_test = generateIdLMNoise(train_params.image_size, 0, epsilon2_update, train_params.effective_batch_size)
    params_to_save['Noise_test'] = Noise_test

    # save params for testing
    with open(os.getcwd() + train_params.params_save_path, 'wb') as outf:
      pickle.dump(params_to_save, outf)
      print('params saved')

    ####################################
    print('start pretrain')
    start_time = time.time()
    lr_schedule_list = sorted(train_params.lr_schedule_pretrain.keys())
    attacks_and_benign = train_params.attacks + ['benign']
    # build zeros numpy arrays for accumulate grads
    accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
    total_pretrain_loss_value = 0.0
    step = 0
    # pretrain loop
    while True:
      # if enough steps, break
      if step > train_params.pretrain_steps:
        break
      # add steps here so not forgot
      else:
        step += 1

      # manual schedule learning rate
      current_epoch = step // (train_params.epoch_steps)
      current_lr = train_params.lr_schedule_pretrain[get_lr(current_epoch, lr_schedule_list)]

      # benign and adv batch
      super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)
      adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)

      # get pretrain grads
      pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], 
                                                                                                                    x_sb_adv: adv_super_batch[0], 
                                                                                                                    learning_rate: current_lr,
                                                                                                                    adv_noise: Noise_test, 
                                                                                                                    noise: Noise, 
                                                                                                                    FM_h: perturbFM_h})
      # accumulate grads
      for i in range(len(accumu_pretrain_grads)):
        accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i]
      
      # accumulate loss values
      total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value

      # use accumulated gradients to update variables
      if step % train_params.batch_multi == 0 and step > 0:
        # print('effective batch reached at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        # compute the average grads and build the feed dict
        pretrain_feed_dict = {}
        for i in range(len(accumu_pretrain_grads)):
          pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi
        pretrain_feed_dict[learning_rate] = current_lr

        # run train ops by feeding the gradients
        sess.run(pretrain_op, feed_dict=pretrain_feed_dict)

        # get loss value
        avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi

        # reset the average grads
        accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
        total_pretrain_loss_value = 0.0

      # print loss
      if step % (1*train_params.epoch_steps) == 0 and step >= (1*train_params.epoch_steps):
        print('pretrain report at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)
        print('pretrain_loss: {:.6f}'.format(avg_pretrain_loss_value))

    ####################################
    print('start train')
    start_time = time.time()
    lr_schedule_list = sorted(train_params.lr_schedule.keys())
    # train whole model
    # build zeros numpy arrays for accumulate grads
    accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
    accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes]
    total_pretrain_loss_value = 0.0
    total_train_loss_value = 0.0
    step = 0
    # train loop
    while True:
      # if enough steps, break
      if step > train_params.train_steps:
        break
      # add steps here so not forgot
      else:
        step += 1

      # compute the grads every step
      # random eps value for trianing
      d_eps = random.random()*train_params.random_eps_range

      # manual schedule learning rate
      current_epoch = step // (train_params.epoch_steps)
      current_lr = train_params.lr_schedule[get_lr(current_epoch, lr_schedule_list)]
      
      # benign and adv batch
      super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)
      adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)

      # create adv samples
      super_batch_adv_images = sess.run(adv_tensors_concat, 
                                        feed_dict={x_sb:adv_super_batch[0], keep_prob:1.0,
                                                    adv_noise: Noise, mu_alpha:[d_eps]})   

      # get pretrain and train grads
      pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], 
                                                                                                                    x_sb_adv: super_batch_adv_images, 
                                                                                                                    learning_rate: current_lr,
                                                                                                                    adv_noise: Noise_test, 
                                                                                                                    noise: Noise, 
                                                                                                                    FM_h: perturbFM_h})
      train_grads_save_np, _train_loss_value = sess.run([train_grads_save, total_loss], feed_dict = {x_sb: super_batch[0], y_sb: super_batch[1],
                                                                  x_sb_adv: super_batch_adv_images, y_sb_adv: adv_super_batch[1],
                                                                  keep_prob: train_params.keep_prob, learning_rate: current_lr,
                                                                  noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h})

      # accumulate grads
      for i in range(len(accumu_pretrain_grads)):
        accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i]

      for i in range(len(accumu_train_grads)):
        accumu_train_grads[i] = accumu_train_grads[i] + train_grads_save_np[i]

      # accumulate loss values
      total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value
      total_train_loss_value = total_train_loss_value + _train_loss_value
      
      # use accumulated gradients to update variables
      if step % train_params.batch_multi == 0 and step > 0:
        # compute the average grads and build the feed dict
        pretrain_feed_dict = {}
        for i in range(len(accumu_pretrain_grads)):
          pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi
        pretrain_feed_dict[learning_rate] = current_lr
        # pretrain_feed_dict[keep_prob] = 0.5

        train_feed_dict = {}
        for i in range(len(accumu_train_grads)):
          train_feed_dict[train_grads_placeholders[i]] = accumu_train_grads[i] / train_params.batch_multi
        train_feed_dict[learning_rate] = current_lr
        # train_feed_dict[keep_prob] = 0.5

        # run train ops
        sess.run(pretrain_op, feed_dict=pretrain_feed_dict)
        sess.run(train_op, feed_dict=train_feed_dict)

        # get loss value
        avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi
        avg_train_loss_value = total_train_loss_value / train_params.batch_multi

        # reset the average grads
        accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
        accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes]
        total_pretrain_loss_value = 0.0
        total_train_loss_value = 0.0

      # print status every epoch
      if step % int(train_params.epoch_steps) == 0:
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)

      # save model
      if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_save:
        print('saving model at epoch {}'.format(step / train_params.epoch_steps))
        checkpoint_path = os.path.join(os.getcwd() + train_params.check_point_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
        
      # testing during training
      if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_test:
        test_start = time.time()
        print('train test reported at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)
        print('pretrain_loss: {:.6f}, train_loss: {:.6f}'.format(avg_pretrain_loss_value, avg_train_loss_value))
        # print('output layer: \n\t{}'.format(output_layer_value))

        #===================adv samples=====================
        adv_acc_dict = {}
        robust_adv_acc_dict = {}
        robust_adv_utility_dict = {}
        log_str = ''
        # cover all test data
        for i in range(train_params.test_epochs):
          test_batch = TIN_data.test.next_batch(train_params.test_batch_size)
          # if more GPUs available, generate testing adv samples at once
          if N_AUX_GPUS > 1:
            adv_images_dict = sess.run(attack_tensor_testing_dict, feed_dict ={x_sb: test_batch[0], 
                                                                               adv_noise: Noise_test, 
                                                                               mu_alpha: [train_params.fgsm_eps],
                                                                               keep_prob: 1.0})
          else:
            adv_images_dict = {}
          # test for each attack
          for atk in attacks_and_benign:
            if atk not in adv_acc_dict:
              adv_acc_dict[atk] = 0.0
              robust_adv_acc_dict[atk] = 0.0
              robust_adv_utility_dict[atk] = 0.0
            if atk == 'benign':
              testing_img = test_batch[0]
            elif attack_switch[atk]:
              # if only one gpu available, generate adv samples in-place
              if atk not in adv_images_dict:
                adv_images_dict[atk] = sess.run(attack_tensor_testing_dict[atk], feed_dict ={x_sb:test_batch[0], 
                                                                                             adv_noise: Noise_test, 
                                                                                             mu_alpha:[train_params.fgsm_eps],
                                                                                             keep_prob: 1.0})
              testing_img = adv_images_dict[atk]
            else:
              continue
            ### PixelDP Robustness ###
            predictions_form_argmax = np.zeros([train_params.test_batch_size, train_params.num_classes])
            softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: perturbFM_h, keep_prob: 1.0})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, train_params.num_samples):
              _BenignLNoise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size)
              _perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), 
                                              train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
              _perturbFM_h = np.reshape(_perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
              for j in range(train_params.test_batch_size):
                pred = argmax_predictions[j]
                predictions_form_argmax[j, pred] += 1
              softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h, keep_prob: 1.0}) * \
                sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h), keep_prob: 1.0})
              argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax
            is_correct = []
            is_robust = []
            for j in range(train_params.test_batch_size):
              is_correct.append(np.argmax(test_batch[1][j]) == np.argmax(final_predictions[j]))
              robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],
                                                                        eta=0.05, dp_attack_size=train_params.fgsm_eps, 
                                                                        dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, 
                                                                        dp_mechanism='laplace') / dp_mult
              is_robust.append(robustness_from_argmax >= train_params.fgsm_eps)
            adv_acc_dict[atk] += np.sum(is_correct)*1.0/train_params.test_batch_size
            robust_adv_acc_dict[atk] += np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_adv_utility_dict[atk] += np.sum(is_robust)*1.0/train_params.test_batch_size
        ##############################
        # average all acc for whole test data
        for atk in attacks_and_benign:
          adv_acc_dict[atk] = adv_acc_dict[atk] / train_params.test_epochs
          robust_adv_acc_dict[atk] = robust_adv_acc_dict[atk] / train_params.test_epochs
          robust_adv_utility_dict[atk] = robust_adv_utility_dict[atk] / train_params.test_epochs
          # added robust prediction
          log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
        dt = time.time() - test_start
        print('testing time: {}'.format(dt))
        print(log_str, flush=True)
        print('*******************')
Ejemplo n.º 21
0
def train_zero_knowledge_gandef_model(train_start=0,
                                      train_end=60000,
                                      test_start=0,
                                      test_end=10000,
                                      smoke_test=True,
                                      save=False,
                                      testing=False,
                                      backprop_through_attack=False,
                                      num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param train_batch_size: size of training batches
    :param test_batch_size: size of testing batches
    :param learning_rate: learning rate for training
    :param save: if true, the final model will be saved
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_fashion_mnist()
    if smoke_test:
        X_train, Y_train, X_test, Y_test = X_train[:
                                                   256], Y_train[:
                                                                 256], X_test[:
                                                                              256], Y_test[:
                                                                                           256]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y_soft = tf.placeholder(tf.float32, shape=(None, 10))

    # Prepare optimizer
    learning_rate = 1e-4
    clf_opt = tf.train.AdamOptimizer(learning_rate)
    dic_opt = tf.train.AdamOptimizer(learning_rate * 10)

    # Train an MNIST model
    train_params = {
        'nb_epochs': 80,
        'batch_size': 128,
        'trade_off': 2,
        'inner_epochs': 1
    }
    rng = np.random.RandomState([2017, 8, 30])

    # Adversarial training
    print("Start adversarial training")
    zero_knowledge_gandef_model = make_zero_knowledge_gandef_model(
        name="model_zero_knowledge_gandef")
    aug_x = gaussian_augment(x, std=1)
    preds_clean = zero_knowledge_gandef_model(x)
    preds_aug = zero_knowledge_gandef_model(aug_x)

    def cross_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    def sigmoid_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    # Perform and evaluate adversarial training
    gan_train_v2(sess,
                 x,
                 y_soft,
                 preds_clean,
                 X_train,
                 Y_train,
                 loss_func=[cross_entropy, sigmoid_entropy],
                 optimizer=[clf_opt, dic_opt],
                 predictions_adv=preds_aug,
                 evaluate=None,
                 args=train_params,
                 rng=rng,
                 var_list=zero_knowledge_gandef_model.get_gan_params())

    # Evaluate the accuracy of the MNIST model on Clean examples
    preds_clean = zero_knowledge_gandef_model(x)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': True,
        'reject_threshold': 0.5
    }
    clean_acc = confident_model_eval(sess,
                                     x,
                                     y_soft,
                                     preds_clean,
                                     X_test,
                                     Y_test,
                                     args=eval_params)
    print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc)
    report.adv_train_clean_eval = clean_acc

    # Evaluate the accuracy of the MNIST model on FGSM examples
    fgsm_params = {'eps': 0.6, 'clip_min': -1., 'clip_max': 1.}
    fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess)
    fgsm_adv = fgsm_att.generate(x, **fgsm_params)
    preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    fgsm_acc = confident_model_eval(sess,
                                    x,
                                    y_soft,
                                    preds_fgsm_adv,
                                    X_test,
                                    Y_test,
                                    args=eval_params)
    print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc)
    report.adv_train_adv_eval = fgsm_acc

    # Evaluate the accuracy of the MNIST model on BIM examples
    bim_params = {'eps': 0.6, 'eps_iter': 0.1, 'clip_min': -1., 'clip_max': 1.}
    bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess)
    bim_adv = bim_att.generate(x, **bim_params)
    preds_bim_adv = zero_knowledge_gandef_model(bim_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    bim_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_bim_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc)
    report.adv_train_adv_eval = bim_acc

    # Evaluate the accuracy of the MNIST model on PGD examples
    pgd_params = {
        'eps': 0.6,
        'eps_iter': 0.02,
        'nb_iter': 40,
        'clip_min': -1.,
        'clip_max': 1.,
        'rand_init': True
    }
    pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess)
    pgd_adv = pgd_att.generate(x, **bim_params)
    preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    pgd_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_pgd_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc)
    report.adv_train_adv_eval = pgd_acc

    # Save model
    if save:
        model_path = "models/zero_knowledge_gandef"
        vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='model_zero_knowledge_gandef*')
        assert len(vars_to_save) > 0
        saver = tf.train.Saver(var_list=vars_to_save)
        saver.save(sess, model_path)
        print('Model saved\n')
    else:
        print('Model not saved\n')
Ejemplo n.º 22
0
wrap_model = Wrapper(base_model)
if mode == "1" or mode == "2":
    attack = CarliniWagnerL2(wrap_model, back="tf", sess=sess)
    gen = attack.generate(x,
                          confidence=confidence,
                          batch_size=batch_size,
                          learning_rate=learning_rate,
                          binary_search_steps=binary_search_steps,
                          max_iterations=max_iterations,
                          abort_early=True,
                          initial_const=initial_const,
                          clip_min=-0.5,
                          clip_max=0.5)
if mode == "3" or mode == "4" or mode == "5":
    attack = MadryEtAl(wrap_model, back="tf", sess=sess)
    gen = attack.generate(x, eps=epsilon, ord=ord, clip_min=-0.5, clip_max=0.5)

# Run Attack
X = data.train_data
X_adv = np.zeros(shape)
for i in range(0, X.shape[0], batch_size):
    x_adv = sess.run(gen, {x: X[i:i + batch_size]})
    X_adv = np.concatenate((X_adv, x_adv))
    print("train ", i)
np.save(dataset + "/train_" + mode, X_adv)

X = data.validation_data
X_adv = np.zeros(shape)
for i in range(0, X.shape[0], batch_size):
    x_adv = sess.run(gen, {x: X[i:i + batch_size]})
    X_adv = np.concatenate((X_adv, x_adv))