Ejemplo n.º 1
0
def main(cfg, *args):
    FLAGS = tf.app.flags.FLAGS
    batch_size = 64

    gan = DefenseGANBase(cfg=cfg, test_mode=True)
    data_dict = reconstruct_dataset(gan_model=gan)
    images_rec, labels, images_orig = data_dict['test']

    # load pretrained classifier
    model = Model('classifiers/model/', tiny=False, mode='eval', sess=gan.sess)

    acc1 = gan.sess.run(model.accuracy,
                        feed_dict={
                            model.x_input: images_rec[:batch_size],
                            model.y_input: labels[:batch_size]
                        })
    acc2 = gan.sess.run(model.accuracy,
                        feed_dict={
                            model.x_input: images_orig[:batch_size],
                            model.y_input: labels[:batch_size]
                        })
    print('Acc1: {}'.format(acc1))
    print('Acc2: {}'.format(acc2))
Ejemplo n.º 2
0
def main(cfg, *args):
    FLAGS = tf.app.flags.FLAGS

    rng = np.random.RandomState([11, 24, 1990])
    tf.set_random_seed(11241990)

    gan = gan_from_config(cfg, True)

    results_dir = 'results/clean/{}'.format(gan.dataset_name)
    ensure_dir(results_dir)

    sess = gan.sess
    gan.load_model()

    # use test split
    train_images, train_labels, test_images, test_labels = get_cached_gan_data(
        gan, test_on_dev=False, orig_data_flag=True)

    x_shape = [None] + list(train_images.shape[1:])
    images_pl = tf.placeholder(tf.float32,
                               shape=[BATCH_SIZE] +
                               list(train_images.shape[1:]))
    labels_pl = tf.placeholder(tf.float32,
                               shape=[BATCH_SIZE] + [train_labels.shape[1]])

    if FLAGS.num_tests > 0:
        test_images = test_images[:FLAGS.num_tests]
        test_labels = test_labels[:FLAGS.num_tests]

    if FLAGS.num_train > 0:
        train_images = train_images[:FLAGS.num_train]
        train_labels = train_labels[:FLAGS.num_train]

    train_params = {
        'nb_epochs': 10,
        'batch_size': BATCH_SIZE,
        'learning_rate': 0.001
    }

    eval_params = {'batch_size': BATCH_SIZE}

    # train classifier for mnist, fmnist
    if gan.dataset_name in ['mnist', 'f-mnist']:
        model = model_a(input_shape=x_shape, nb_classes=train_labels.shape[1])
        preds_train = model.get_logits(images_pl, dropout=True)

        model_train(sess,
                    images_pl,
                    labels_pl,
                    preds_train,
                    train_images,
                    train_labels,
                    args=train_params,
                    rng=rng,
                    init_all=False)

    elif gan.dataset_name == 'cifar-10':
        pre_model = Model('classifiers/model/',
                          tiny=False,
                          mode='eval',
                          sess=sess)
        model = DefenseWrapper(pre_model, 'logits')

    elif gan.dataset_name == 'celeba':
        # TODO
        raise NotImplementedError

    model.add_rec_model(gan, batch_size=BATCH_SIZE)
    preds_eval = model.get_logits(images_pl)

    # calculate norms
    num_dims = len(images_pl.get_shape())
    avg_inds = list(range(1, num_dims))
    reconstruct = gan.reconstruct(images_pl, batch_size=BATCH_SIZE)

    # We use L2 loss for GD steps
    diff_op = tf.reduce_mean(tf.square(reconstruct - images_pl), axis=avg_inds)

    acc, mse, roc_info = model_eval_gan(sess,
                                        images_pl,
                                        labels_pl,
                                        preds_eval,
                                        None,
                                        test_images=test_images,
                                        test_labels=test_labels,
                                        args=eval_params,
                                        diff_op=diff_op)
    # Logging
    logfile = open(os.path.join(results_dir, 'acc.txt'), 'a+')
    msg = 'lr_{}_iters_{}, {}\n'.format(gan.rec_lr, gan.rec_iters, acc)
    logfile.writelines(msg)
    logfile.close()

    logfile = open(os.path.join(results_dir, 'mse.txt'), 'a+')
    msg = 'lr_{}_iters_{}, {}\n'.format(gan.rec_lr, gan.rec_iters, mse)
    logfile.writelines(msg)
    logfile.close()

    pickle_filename = os.path.join(
        results_dir, 'roc_lr_{}_iters_{}.pkl'.format(gan.rec_lr,
                                                     gan.rec_iters))
    with open(pickle_filename, 'w') as f:
        cPickle.dump(roc_info, f, cPickle.HIGHEST_PROTOCOL)
        print('[*] saved roc_info in {}'.format(pickle_filename))

    return [acc, mse]
Ejemplo n.º 3
0
def whitebox(gan,
             rec_data_path=None,
             batch_size=128,
             learning_rate=0.001,
             nb_epochs=10,
             eps=0.0627,
             online_training=False,
             test_on_dev=False,
             attack_type='fgsm',
             defense_type='gan',
             num_tests=-1,
             num_train=-1):
    """Based on MNIST tutorial from cleverhans.

    Args:
         gan: A `GAN` model.
         rec_data_path: A string to the directory.
         batch_size: The size of the batch.
         learning_rate: The learning rate for training the target models.
         nb_epochs: Number of epochs for training the target model.
         eps: The epsilon of FGSM.
         online_training: Training Defense-GAN with online reconstruction. The
            faster but less accurate way is to reconstruct the dataset once and use
            it to train the target models with:
            `python train.py --cfg <path-to-model> --save_recs`
         attack_type: Type of the white-box attack. It can be `fgsm`,
            `rand+fgsm`, or `cw`.
         defense_type: String representing the type of attack. Can be `none`,
            `defense_gan`, or `adv_tr`.
    """

    FLAGS = tf.flags.FLAGS

    rng = np.random.RandomState([11, 24, 1990])
    tf.set_random_seed(11241990)

    # Set logging level to see debug information.
    set_log_level(logging.WARNING)

    train_images, train_labels, test_images, test_labels = get_cached_gan_data(
        gan, test_on_dev, orig_data_flag=True)

    if defense_type == 'defense_gan':
        assert gan is not None
        sess = gan.sess
    else:
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

    images_pl = tf.placeholder(tf.float32,
                               shape=[None] + list(train_images.shape[1:]))
    labels_pl = tf.placeholder(tf.float32,
                               shape=[None] + [train_labels.shape[1]])

    if num_tests > 0:
        test_images = test_images[:num_tests]
        test_labels = test_labels[:num_tests]

    if num_train > 0:
        train_images = train_images[:num_train]
        train_labels = train_labels[:num_train]

    # load and wrap pre-trained model
    pre_model = Model('classifiers/model/', tiny=False, mode='eval', sess=sess)
    model = DefenseWrapper(pre_model, 'logits')

    preds = model.get_logits(images_pl)

    # Evaluate pre-trained model
    eval_params = {'batch_size': batch_size}
    train_acc = model_eval(sess,
                           images_pl,
                           labels_pl,
                           preds,
                           train_images,
                           train_labels,
                           args=eval_params)
    eval_acc = model_eval(sess,
                          images_pl,
                          labels_pl,
                          preds,
                          test_images,
                          test_labels,
                          args=eval_params)

    print('[#] Train acc: {}'.format(train_acc))
    print('[#] Eval acc: {}'.format(eval_acc))

    if attack_type == 'none':
        return eval_acc, 0, None

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph.

    if FLAGS.defense_type == 'defense_gan':

        model.add_rec_model(gan, batch_size)

    min_val = 0.0
    if gan:
        if gan.dataset_name == 'celeba' or gan.dataset_name == 'cifar-10':
            min_val = -1.0

    if 'rand' in FLAGS.attack_type:
        test_images = np.clip(
            test_images +
            args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val,
            1.0)
        eps -= args.alpha

    if 'fgsm' in FLAGS.attack_type:
        attack_params = {
            'eps': eps,
            'ord': np.inf,
            'clip_min': min_val,
            'clip_max': 1.
        }
        attack_obj = FastGradientMethod(model, sess=sess)
    elif FLAGS.attack_type == 'cw':
        attack_obj = CarliniWagnerL2(model, sess=sess)
        attack_iterations = 100
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 10.0,
            'batch_size': batch_size,
            'initial_const': 100
        }

    adv_x = attack_obj.generate(images_pl, **attack_params)

    eval_par = {'batch_size': batch_size}
    if FLAGS.defense_type == 'defense_gan':
        num_dims = len(images_pl.get_shape())
        avg_inds = list(range(1, num_dims))

        preds_adv = model.get_probs(adv_x)
        diff_op = tf.reduce_mean(tf.square(adv_x - images_pl), axis=avg_inds)
        acc_adv, roc_info = model_eval_gan(sess,
                                           images_pl,
                                           labels_pl,
                                           preds_adv,
                                           None,
                                           test_images=test_images,
                                           test_labels=test_labels,
                                           args=eval_par,
                                           diff_op=diff_op)

        print('Training accuracy: {}'.format(train_acc))
        print('Evaluation accuracy: {}'.format(eval_acc))
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)
        return acc_adv, 0, roc_info
    else:
        preds_adv = model(adv_x)
        acc_adv = model_eval(sess,
                             images_pl,
                             labels_pl,
                             preds_adv,
                             test_images,
                             test_labels,
                             args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)

        return acc_adv, 0, None
Ejemplo n.º 4
0
def whitebox(gan,
             rec_data_path=None,
             batch_size=128,
             learning_rate=0.001,
             nb_epochs=10,
             eps=0.3,
             online_training=False,
             test_on_dev=False,
             attack_type='fgsm',
             defense_type='gan',
             num_tests=-1,
             num_train=-1,
             cfg=None):
    """Based on MNIST tutorial from cleverhans.
    
    Args:
         gan: A `GAN` model.
         rec_data_path: A string to the directory.
         batch_size: The size of the batch.
         learning_rate: The learning rate for training the target models.
         nb_epochs: Number of epochs for training the target model.
         eps: The epsilon of FGSM.
         online_training: Training Defense-GAN with online reconstruction. The
            faster but less accurate way is to reconstruct the dataset once and use
            it to train the target models with:
            `python train.py --cfg <path-to-model> --save_recs`
         attack_type: Type of the white-box attack. It can be `fgsm`,
            `rand+fgsm`, or `cw`.
         defense_type: String representing the type of attack. Can be `none`,
            `defense_gan`, or `adv_tr`.
    """

    FLAGS = tf.flags.FLAGS
    rng = np.random.RandomState([11, 24, 1990])

    # Set logging level to see debug information.
    set_log_level(logging.WARNING)

    ### Attack paramters
    eps = attack_config_dict[gan.dataset_name]['eps']
    min_val = attack_config_dict[gan.dataset_name]['clip_min']
    attack_iterations = FLAGS.attack_iters
    search_steps = FLAGS.search_steps

    train_images, train_labels, test_images, test_labels = get_cached_gan_data(
        gan, test_on_dev, orig_data_flag=True)

    sess = gan.sess
    # if defense_type == 'defense_gan':
    #     assert gan is not None
    #     sess = gan.sess
    #
    #     if FLAGS.train_on_recs:
    #         assert rec_data_path is not None or online_training
    # else:
    #     config = tf.ConfigProto()
    #     config.gpu_options.allow_growth = True
    #     sess = tf.Session(config=config)

    # Classifier is trained on either original data or reconstructed data.
    # During testing, the input image will be reconstructed by GAN.
    # Therefore, we use rec_test_images as input to the classifier.
    # When evaluating defense_gan with attack, input should be test_images.

    x_shape = [None] + list(train_images.shape[1:])
    images_pl = tf.placeholder(tf.float32,
                               shape=[None] + list(train_images.shape[1:]))
    labels_pl = tf.placeholder(tf.float32,
                               shape=[None] + [train_labels.shape[1]])

    if num_tests > 0:
        test_images = test_images[:num_tests]
        test_labels = test_labels[:num_tests]

    if num_train > 0:
        train_images = train_images[:num_train]
        train_labels = train_labels[:num_train]

    # Creating classificaion model

    if gan.dataset_name in ['mnist', 'f-mnist']:
        images_pl_transformed = images_pl
        models = {
            'A': model_a,
            'B': model_b,
            'C': model_c,
            'D': model_d,
            'E': model_e,
            'F': model_f
        }

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            model = models[FLAGS.model](input_shape=x_shape,
                                        nb_classes=train_labels.shape[1])

        used_vars = model.get_params()
        preds_train = model.get_logits(images_pl_transformed, dropout=True)
        preds_eval = model.get_logits(images_pl_transformed)

    elif gan.dataset_name == 'cifar-10':
        images_pl_transformed = images_pl
        pre_model = Model('classifiers/model/cifar-10',
                          tiny=False,
                          mode='eval',
                          sess=sess)
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            model = DefenseWrapper(pre_model, 'logits')

        used_vars = [
            x for x in tf.global_variables() if x.name.startswith('model')
        ]
        preds_eval = model.get_logits(images_pl_transformed)

    elif gan.dataset_name == 'celeba':
        images_pl_transformed = tf.cast(images_pl, tf.float32) / 255. * 2. - 1.
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            model = model_y(input_shape=x_shape,
                            nb_classes=train_labels.shape[1])

        used_vars = model.get_params()
        preds_train = model.get_logits(images_pl_transformed, dropout=True)
        preds_eval = model.get_logits(images_pl_transformed)

    # Creating BPDA model
    if attack_type in ['bpda', 'bpda-pgd']:
        gan_bpda = InvertorDefenseGAN(get_generator_fn(cfg['DATASET_NAME'],
                                                       cfg['USE_RESBLOCK']),
                                      cfg=cfg,
                                      test_mode=True)
        gan_bpda.checkpoint_dir = cfg['BPDA_ENCODER_CP_PATH']
        gan_bpda.generator_init_path = cfg['BPDA_GENERATOR_INIT_PATH']
        gan_bpda.active_sess = sess
        gan_bpda.load_model()

        if gan.dataset_name in ['mnist', 'f-mnist']:
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                attack_model = models[FLAGS.model](
                    input_shape=x_shape, nb_classes=train_labels.shape[1])
            attack_used_vars = attack_model.get_params()
        elif gan.dataset_name == 'cifar-10':
            pre_model_attack = Model('classifiers/model/cifar-10',
                                     tiny=False,
                                     mode='eval',
                                     sess=sess)
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                attack_model = DefenseWrapper(pre_model_attack, 'logits')
            attack_used_vars = [
                x for x in tf.global_variables() if x.name.startswith('model')
            ]
        elif gan.dataset_name == 'celeba':
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                attack_model = model_y(input_shape=x_shape,
                                       nb_classes=train_labels.shape[1])
            attack_used_vars = attack_model.get_params()

    report = AccuracyReport()

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test
        # examples.
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         images_pl,
                         labels_pl,
                         preds_eval,
                         test_images,
                         test_labels,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        print('Test accuracy: %0.4f' % acc)

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': 'classifiers/model/{}'.format(gan.dataset_name),
        'filename': 'model_{}'.format(FLAGS.model)
    }

    preds_adv = None
    if FLAGS.defense_type == 'adv_tr':
        attack_params = {
            'eps': FLAGS.fgsm_eps_tr,
            'clip_min': 0.,
            'clip_max': 1.
        }
        if gan:
            if gan.dataset_name == 'celeba':
                attack_params['clip_min'] = -1.0

        attack_obj = FastGradientMethod(model, sess=sess)
        adv_x_tr = attack_obj.generate(images_pl_transformed, **attack_params)
        adv_x_tr = tf.stop_gradient(adv_x_tr)
        preds_adv = model(adv_x_tr)

    classifier_load_success = False
    if FLAGS.load_classifier:
        try:
            path = tf.train.latest_checkpoint('classifiers/model/{}'.format(
                gan.dataset_name))
            saver = tf.train.Saver(var_list=used_vars)
            saver.restore(sess, path)
            print('[+] Classifier loaded successfully ...')
            classifier_load_success = True
        except:
            print('[-] Cannot load classifier ...')
            classifier_load_success = False

    if classifier_load_success == False:
        print('[+] Training classifier model ...')
        model_train(sess,
                    images_pl,
                    labels_pl,
                    preds_train,
                    train_images,
                    train_labels,
                    args=train_params,
                    rng=rng,
                    predictions_adv=preds_adv,
                    init_all=False,
                    evaluate=evaluate,
                    save=False)

    if attack_type in ['bpda', 'bpda-pgd']:
        # Initialize attack model weights with trained model
        path = tf.train.latest_checkpoint('classifiers/model/{}'.format(
            gan.dataset_name))
        saver = tf.train.Saver(var_list=attack_used_vars)
        saver.restore(sess, path)
        print('[+] Attack model initialized successfully ...')

        # Add self.enc_reconstruction
        # Only auto-encodes to reconstruct. No GD is performed
        attack_model.add_rec_model(gan_bpda, batch_size, ae_flag=True)

    # Calculate training error.
    eval_params = {'batch_size': batch_size}

    # Evaluate trained model
    #train_acc = model_eval(sess, images_pl, labels_pl, preds_eval, train_images, train_labels,
    #                       args=eval_params)
    # print('[#] Train acc: {}'.format(train_acc))

    eval_acc = model_eval(sess,
                          images_pl,
                          labels_pl,
                          preds_eval,
                          test_images,
                          test_labels,
                          args=eval_params)
    print('[#] Eval acc: {}'.format(eval_acc))

    reconstructor = get_reconstructor(gan)

    if attack_type is None:
        return eval_acc, 0, None

    if 'rand' in FLAGS.attack_type:
        test_images = np.clip(
            test_images +
            args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val,
            1.0)
        eps -= args.alpha

    if 'fgsm' in FLAGS.attack_type:
        attack_params = {
            'eps': eps,
            'ord': np.inf,
            'clip_min': min_val,
            'clip_max': 1.
        }
        attack_obj = FastGradientMethod(model, sess=sess)
    elif FLAGS.attack_type == 'cw':
        attack_obj = CarliniWagnerL2(model, sess=sess)
        attack_params = {
            'binary_search_steps': 6,
            'max_iterations': attack_iterations,
            'learning_rate': 0.2,
            'batch_size': batch_size,
            'clip_min': min_val,
            'clip_max': 1.,
            'initial_const': 10.0
        }

    elif FLAGS.attack_type == 'madry':
        attack_obj = MadryEtAl(model, sess=sess)
        attack_params = {
            'eps': eps,
            'eps_iter': eps / 4.0,
            'clip_min': min_val,
            'clip_max': 1.,
            'ord': np.inf,
            'nb_iter': attack_iterations
        }

    elif FLAGS.attack_type == 'bpda':
        # BPDA + FGSM
        attack_params = {
            'eps': eps,
            'ord': np.inf,
            'clip_min': min_val,
            'clip_max': 1.
        }
        attack_obj = FastGradientMethod(attack_model, sess=sess)

    elif FLAGS.attack_type == 'bpda-pgd':
        # BPDA + PGD
        attack_params = {
            'eps': eps,
            'eps_iter': eps / 4.0,
            'clip_min': min_val,
            'clip_max': 1.,
            'ord': np.inf,
            'nb_iter': attack_iterations
        }
        attack_obj = MadryEtAl(attack_model, sess=sess)

    elif FLAGS.attack_type == 'bpda-l2':
        # default: lr=1.0, c=0.1
        attack_obj = BPDAL2(model, reconstructor, sess=sess)
        attack_params = {
            'binary_search_steps': search_steps,
            'max_iterations': attack_iterations,
            'learning_rate': 0.2,
            'batch_size': batch_size,
            'clip_min': min_val,
            'clip_max': 1.,
            'initial_const': 10.0
        }

    adv_x = attack_obj.generate(images_pl_transformed, **attack_params)

    if FLAGS.defense_type == 'defense_gan':

        recons_adv, zs = reconstructor.reconstruct(adv_x,
                                                   batch_size=batch_size,
                                                   reconstructor_id=123)

        preds_adv = model.get_logits(recons_adv)

        sess.run(tf.local_variables_initializer())

        diff_op = get_diff_op(model, adv_x, recons_adv, FLAGS.detect_image)
        z_norm = tf.reduce_sum(tf.square(zs), axis=1)

        acc_adv, diffs_mean, roc_info_adv = model_eval_gan(
            sess,
            images_pl,
            labels_pl,
            preds_adv,
            None,
            test_images=test_images,
            test_labels=test_labels,
            args=eval_params,
            diff_op=diff_op,
            z_norm=z_norm,
            recons_adv=recons_adv,
            adv_x=adv_x,
            debug=FLAGS.debug,
            vis_dir=_get_vis_dir(gan, FLAGS.attack_type))

        # reconstruction on clean images
        recons_clean, zs = reconstructor.reconstruct(images_pl_transformed,
                                                     batch_size=batch_size)
        preds_eval = model.get_logits(recons_clean)

        sess.run(tf.local_variables_initializer())

        diff_op = get_diff_op(model, images_pl_transformed, recons_clean,
                              FLAGS.detect_image)
        z_norm = tf.reduce_sum(tf.square(zs), axis=1)

        acc_rec, diffs_mean_rec, roc_info_rec = model_eval_gan(
            sess,
            images_pl,
            labels_pl,
            preds_eval,
            None,
            test_images=test_images,
            test_labels=test_labels,
            args=eval_params,
            diff_op=diff_op,
            z_norm=z_norm,
            recons_adv=recons_clean,
            adv_x=images_pl,
            debug=FLAGS.debug,
            vis_dir=_get_vis_dir(gan, 'clean'))

        # print('Training accuracy: {}'.format(train_acc))
        print('Evaluation accuracy: {}'.format(eval_acc))
        print('Evaluation accuracy with reconstruction: {}'.format(acc_rec))
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)

        return {
            'acc_adv': acc_adv,
            'acc_rec': acc_rec,
            'roc_info_adv': roc_info_adv,
            'roc_info_rec': roc_info_rec
        }
    else:
        preds_adv = model.get_logits(adv_x)
        sess.run(tf.local_variables_initializer())
        acc_adv = model_eval(sess,
                             images_pl,
                             labels_pl,
                             preds_adv,
                             test_images,
                             test_labels,
                             args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)

        return {
            'acc_adv': acc_adv,
            'acc_rec': 0,
            'roc_info_adv': None,
            'roc_info_rec': None
        }
Ejemplo n.º 5
0
def prep_bbox(sess, images, labels, images_train, labels_train, images_test,
              labels_test, nb_epochs, batch_size, learning_rate, rng, gan=None,
              adv_training=False, cnn_arch=None):
    """Defines and trains a model that simulates the "remote"
    black-box oracle described in https://arxiv.org/abs/1602.02697.
    
    Args:
        sess: the TF session
        images: the input placeholder
        labels: the ouput placeholder
        images_train: the training data for the oracle
        labels_train: the training labels for the oracle
        images_test: the testing data for the oracle
        labels_test: the testing labels for the oracle
        nb_epochs: number of epochs to train model
        batch_size: size of training batches
        learning_rate: learning rate for training
        rng: numpy.random.RandomState
    
    Returns:
        model: The blackbox model function.
        predictions: The predictions tensor.
        accuracy: Accuracy of the model.
    """

    # Define TF model graph (for the black-box model).
    model = cnn_arch
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': 'classifiers/model/{}'.format(gan.dataset_name),
        'filename': 'model_{}'.format(FLAGS.bb_model)
    }
    eval_params = {'batch_size': batch_size}

    if gan.dataset_name in ['mnist', 'f-mnist']:
        used_vars = model.get_params()
        pred_train = model.get_logits(images, dropout=True)
        pred_eval = model.get_logits(images)

    elif gan.dataset_name == 'cifar-10':
        pre_model = Model('classifiers/model/cifar-10', tiny=False, mode='eval', sess=sess)
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            model = DefenseWrapper(pre_model, 'logits')
        used_vars = [x for x in tf.global_variables() if x.name.startswith('model')]
        pred_eval = model.get_logits(images)

    elif gan.dataset_name == 'celeba':
        images_pl_transformed = tf.cast(images, tf.float32) / 255. * 2. - 1.
        used_vars = model.get_params()
        pred_train = model.get_logits(images_pl_transformed, dropout=True)
        pred_eval = model.get_logits(images_pl_transformed)

    classifier_load_success = False
    if FLAGS.load_bb_model:
        try:
            path = tf.train.latest_checkpoint('classifiers/model/{}'.format(gan.dataset_name))
            saver = tf.train.Saver(var_list=used_vars)
            saver.restore(sess, path)
            print('[+] BB model loaded successfully ...')
            classifier_load_success = True
        except:
            print('[-] Fail to load BB model ...')
            classifier_load_success = False

    if not classifier_load_success:
        print('[+] Training classifier model ...')
        model_train(sess, images, labels, pred_train, images_train, labels_train,
                args=train_params, rng=rng, predictions_adv=None, init_all=False, save=False)
    # Print out the accuracy on legitimate test data.
    accuracy = model_eval(
        sess, images, labels, pred_eval, images_test,
        labels_test, args=eval_params,
    )

    print('Test accuracy of black-box on legitimate test examples: ' + str(accuracy))

    return model, pred_eval, accuracy
Ejemplo n.º 6
0
def main(cfg, *args):
    FLAGS = tf.app.flags.FLAGS

    rng = np.random.RandomState([11, 24, 1990])

    gan = gan_from_config(cfg, True)

    results_dir = 'results/sweep/{}'.format(gan.dataset_name)
    ensure_dir(results_dir)

    sess = gan.sess
    gan.load_model()

    # Evaluate on dev set
    train_images, train_labels, test_images, test_labels = get_cached_gan_data(
        gan, test_on_dev=True, orig_data_flag=True)

    x_shape = [None] + list(train_images.shape[1:])
    images_pl = tf.placeholder(tf.float32,
                               shape=[None] + list(train_images.shape[1:]))
    labels_pl = tf.placeholder(tf.float32,
                               shape=[None] + [train_labels.shape[1]])

    train_params = {'nb_epochs': 10, 'batch_size': 128, 'learning_rate': 0.001}

    eval_params = {'batch_size': 128}

    # train classifier for mnist, fmnist
    if gan.dataset_name in ['mnist', 'f-mnist']:
        model = model_a(input_shape=x_shape, nb_classes=train_labels.shape[1])
        preds_train = model.get_logits(images_pl, dropout=True)
        preds_eval = model.get_logits(images_pl)

        tf.set_random_seed(11241990)

        model_train(sess,
                    images_pl,
                    labels_pl,
                    preds_train,
                    train_images,
                    train_labels,
                    args=train_params,
                    rng=rng,
                    init_all=False)

    elif gan.dataset_name == 'cifar-10':
        pre_model = Model('classifiers/model/',
                          tiny=False,
                          mode='eval',
                          sess=sess)
        model = DefenseWrapper(pre_model, 'logits')

        preds_eval = model.get_logits(images_pl)

    train_acc = model_eval(sess,
                           images_pl,
                           labels_pl,
                           preds_eval,
                           train_images,
                           train_labels,
                           args=eval_params)
    eval_acc = model_eval(sess,
                          images_pl,
                          labels_pl,
                          preds_eval,
                          test_images,
                          test_labels,
                          args=eval_params)

    model.add_rec_model(gan, batch_size=128)
    preds_eval = model.get_logits(images_pl)
    tf.set_random_seed(11241990)
    sess.run(tf.local_variables_initializer())

    eval_rec_acc = model_eval(sess,
                              images_pl,
                              labels_pl,
                              preds_eval,
                              test_images,
                              test_labels,
                              args=eval_params)
    # Logging
    logfile = open(os.path.join(results_dir, 'acc_train.txt'), 'a+')
    msg = 'iters_{}_lr_{}, {:6f}\n'.format(gan.rec_iters, gan.rec_lr,
                                           train_acc)
    logfile.writelines(msg)
    logfile.close()

    logfile = open(os.path.join(results_dir, 'acc_eval.txt'), 'a+')
    msg = 'iters_{}_lr_{}, {:6f}\n'.format(gan.rec_iters, gan.rec_lr, eval_acc)
    logfile.writelines(msg)
    logfile.close()

    logfile = open(os.path.join(results_dir, 'acc_eval_rec.txt'), 'a+')
    msg = 'iters_{}_lr_{}, {:6f}\n'.format(gan.rec_iters, gan.rec_lr,
                                           eval_rec_acc)
    logfile.writelines(msg)
    logfile.close()

    return [train_acc, eval_acc, eval_rec_acc]