def _get_pert(self, X, Y, eps: float, model):
        x = tf.placeholder(tf.float32, shape=([None] + list(self.n_features)))
        y = tf.placeholder(tf.float32, shape=(None, self.n_classes))

        wrap = KerasModelWrapper(model)
        pgd = ProjectedGradientDescent(wrap, ord=self.ord, sess=self.sess)
        if eps >= 0.05:
            adv_x = pgd.generate(x, y=y, eps=eps)
        else:
            adv_x = pgd.generate(x, y=y, eps=eps, eps_iter=eps)
        adv_x = tf.stop_gradient(adv_x)
        ret = adv_x - x
        return ret.eval(feed_dict={x: X, y: Y}, session=self.sess)
class PGDAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 targeted=False,
                 step_size_iter=0.05,
                 max_perturbation=0.3,
                 n_iterations=10,
                 norm_order=np.inf,
                 rand_init=None,
                 rand_minmax=0.3,
                 clip_min=None,
                 clip_max=None,
                 sanity_checks=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._n_iterations = n_iterations
        self._norm_order = norm_order
        self._rand_init = rand_init
        self._rand_minmax = rand_minmax
        self._sanity_checks = sanity_checks

        with self.graph.as_default():
            self._method = ProjectedGradientDescent(
                self._model,
                sess=self.session,
                eps=self._max_perturbation,
                eps_iter=self._step_size_iter,
                nb_iter=self._n_iterations,
                ord=self._norm_order,
                rand_init=self._rand_init,
                clip_min=self._clip_min,
                clip_max=self._clip_max,
                sanity_checks=self._sanity_checks)

    def attack_method(self, labels):
        if labels is not None:
            if self._targeted:
                return self._method.generate(x=self._x_clean,
                                             y_target=labels,
                                             rand_minmax=self._rand_minmax)
            else:
                return self._method.generate(x=self._x_clean,
                                             y=labels,
                                             rand_minmax=self._rand_minmax)
        return self._method.generate(x=self._x_clean,
                                     rand_minmax=self._rand_minmax)
Exemple #3
0
def get_at_loss(sess, x, y, model, eps, eps_iter, iterations):
    # Set up PGD attack graph using Cleverhans library

    pgd_params = {
        'ord': np.inf,
        'y': y,
        'eps': eps / 255,
        'eps_iter': eps_iter / 255,
        'nb_iter': iterations,
        'rand_init': True,
        'rand_minmax': eps / 255,
        'clip_min': 0.,
        'clip_max': 1.,
        'sanity_checks': True
    }

    pgd = ProjectedGradientDescent(model, sess=sess)
    adv_x = pgd.generate(x, **pgd_params)
    adv_logits = model.get_logits(adv_x)

    # Add summary for adversarial training images
    with tf.device('/gpu:0'):
        with tf.name_scope('Adversarial-Image-Summaries'):
            tf.summary.image('adv-input',
                             adv_x,
                             max_outputs=2,
                             family='Adversarial-Training',
                             collections=['training'])

    adv_loss = tf.nn.softmax_cross_entropy_with_logits(logits=adv_logits,
                                                       labels=y)
    adv_loss = tf.reduce_mean(adv_loss)

    return adv_loss, adv_logits
def attack_images(model, tfrecords_dirpath, attack_type='PGD', attack_kwargs=default_attack_kwargs):
    '''
    Attack images (batch = 1 for now)
    '''

    # Get the true label
    true_label = attack_kwargs['y']
    attack_label = attack_kwargs['y_target']
    del attack_kwargs['y']

    # Define tfrecords input iterator
    tfrecord_filepaths = glob(os.path.join(tfrecords_dirpath, '*'))
    tf_dataset = tfutils.make_dataset(
                    tfrecord_filepaths,
                    batch_size=1,
                    filter_label=true_label,
                    preprocessing_fn=preprocess_input
                )
    iterator = tf_dataset.make_one_shot_iterator()
    x, y = iterator.get_next()

    # Run the Session
    attacked_imgs = []
    with tf.Session() as sess:

        # Set attack settings
        # PGD
        if attack_type == "PGD":
            attack = ProjectedGradientDescent(model, sess=sess)
        # FGM
        elif attack_type == "FGM":
            attack = FastGradientMethod(model, sess=sess)
        target_one_hot_encoded = get_one_hot_encoded_targets(attack_label)
        attack_kwargs['y_target'] = target_one_hot_encoded

        # Run the session to generate attacked images
        x_adv = attack.generate(x, **attack_kwargs)
        pbar = tqdm(unit='imgs')
        try:
            while True:
                attacked_img = sess.run(x_adv)
                predicted_class = get_predictions(model, attacked_img)
                print(predicted_class, attack_label)
                if predicted_class == attack_label:
                    attacked_imgs.append(attacked_img)
                pbar.update()
        except tf.errors.OutOfRangeError:
            pass

    if len(attacked_imgs) > 0:
        attacked_imgs = np.vstack(attacked_imgs)
    return attacked_imgs
Exemple #5
0
def pgd_attack():
    # Use tf for evaluation on adversarial data
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    sess = tf.Session(config=tf_config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        32,
        32,
    ))
    y_op = tf.placeholder(tf.float32, shape=(None, 10))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an PGD attack
    pgd = ProjectedGradientDescent(cleverhans_model, sess=sess)
    pgd_params = {
        'eps': args.eps,
        'eps_iter': args.ss,
        'nb_iter': args.ns,
        'clip_min': 0.,
        'clip_max': 1.,
        'y': y_op
    }

    adv_x_op = pgd.generate(x_op, **pgd_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Evaluation against PGD attacks
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        adv_preds = sess.run(adv_preds_op,
                             feed_dict={
                                 x_op: inputs,
                                 y_op:
                                 torch.nn.functional.one_hot(targets, 10)
                             })
        correct += (np.argmax(adv_preds, axis=1) == targets.numpy()).sum()
        total += len(inputs)

        sys.stdout.write("\rWhite-box PGD attack... Acc: %.3f%% (%d/%d)" %
                         (100. * correct / total, correct, total))
        sys.stdout.flush()

    print('Accuracy under PGD attack: %.3f%%' % (100. * correct / total))
    def _get_pert(self, X, Y, eps):
        if eps == 0:
            return np.zeros_like(X)
        with self.sess.as_default():
            self.x = self.wrap.input

            pgd = ProjectedGradientDescent(self.x, sess=self.sess)
            adv_x = pgd.generate(self.x,
                                 y=self.y,
                                 eps=eps,
                                 ord=self.ord,
                                 eps_iter=0.01)
            adv_x = tf.stop_gradient(adv_x)
            pert_x = adv_x - self.x

            feed_dict = {self.x: X, self.y: Y}
            ret = pert_x.eval(feed_dict=feed_dict)
        return ret
Exemple #7
0
def evaluate_checkpoint(filename):
    if attack_method == 'BIM':
        bim = BasicIterativeMethod(model)
        bim_params = {
            'eps': 0.3,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 50,
            'eps_iter': .01
        }
        adv_x = bim.generate(x_image, **bim_params)
    elif attack_method == 'FGM':
        FGM_attack = FastGradientMethod(model)
        FGM_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_x = FGM_attack.generate(x_image, **FGM_params)
    elif attack_method == 'PGD':
        pgd = ProjectedGradientDescent(model)
        pgd_params = {
            'eps': 0.09,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 40,
            'eps_iter': .01
        }
        adv_x = pgd.generate(x_image, **pgd_params)
    preds_adv = model.get_probs(adv_x)

    with tf.Session() as sess:
        # Restore the checkpoint
        saver = tf.train.Saver(var_list=model.all_variables)
        saver.restore(sess, filename)

        eval_par = {'batch_size': batch_size}
        t1 = time.time()
        acc = model_eval(sess,
                         x_image,
                         y,
                         preds_adv,
                         X_test,
                         Y_test,
                         args=eval_par)
        t2 = time.time()
        print("Took", t2 - t1, "seconds")
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
Exemple #8
0
def get_alp_loss(sess, x, y, logits, adv_logits, model, eps, eps_iter,
                 iterations):
    if adv_logits is None:
        pgd_params = {
            'ord': np.inf,
            'y': y,
            'eps': eps / 255,
            'eps_iter': eps_iter / 255,
            'nb_iter': iterations,
            'rand_init': True,
            'rand_minmax': eps / 255,
            'clip_min': 0.,
            'clip_max': 1.,
            'sanity_checks': True
        }

        pgd = ProjectedGradientDescent(model, sess=sess)
        adv_x = pgd.generate(x, **pgd_params)
        adv_logits = model.get_logits(adv_x)

    adv_pairing_loss = tf.losses.mean_squared_error(logits, adv_logits)

    return adv_pairing_loss
Exemple #9
0
def save_pgd_attacked_images(original_class,
                             target_class,
                             attack_strength,
                             nb_iter=50,
                             seed=1000):

    random.seed(seed)
    np.random.seed(seed)
    tf.set_random_seed(seed)

    eps = attack_strength
    labels_by_name = load_labels_by_name()
    target_label = labels_by_name[target_class].lucid_label

    benign_dataset_path = DataPaths.get_benign_images_datapath(original_class)
    assert benign_dataset_path.exists()

    attacked_dataset_path = DataPaths.get_attacked_images_datapath(
        original_class, target_class, attack_name='pgd', attack_strength=eps)
    assert not attacked_dataset_path.exists()
    print('Saving attacked images to %s' % attacked_dataset_path)

    img_dataset = hdf5utils.load_image_dataset_from_file(benign_dataset_path)

    output_file = h5py.File(attacked_dataset_path, 'w')
    out_dataset = hdf5utils.create_image_dataset(output_file,
                                                 dataset_name='images')
    indices_dataset = hdf5utils.create_dataset(output_file,
                                               data_shape=(1, ),
                                               dataset_name='indices')

    graph = tf.Graph()
    with graph.as_default():
        model = InceptionV1Model()
        x = model.default_input_placeholder
        y_pred = model.get_predicted_class(x)
        with tf.Session(graph=graph) as sess:
            attack = ProjectedGradientDescent(model, sess=sess)
            target_one_hot_encoded = get_one_hot_encoded_targets(target_label)

            x_adv = attack.generate(x,
                                    eps=eps,
                                    nb_iter=nb_iter,
                                    clip_min=-1,
                                    clip_max=1,
                                    eps_iter=(eps / 5),
                                    ord=2,
                                    y_target=target_one_hot_encoded)

            num_attack_success = 0
            pbar = tqdm(unit='imgs', total=len(img_dataset))
            try:
                for i, img in enumerate(img_dataset):
                    ben_img = np.array(img)
                    adv_img = sess.run(x_adv, feed_dict={x: [ben_img]})
                    attack_pred = sess.run(y_pred, feed_dict={x: adv_img})

                    adv_img = adv_img[0]
                    attack_pred = attack_pred[0]
                    assert not np.any(np.isnan(adv_img))
                    assert not np.isnan(attack_pred)

                    if attack_pred == target_label:
                        index = np.array([i])
                        num_attack_success += 1
                        hdf5utils.add_image_to_dataset(adv_img, out_dataset)
                        hdf5utils.add_item_to_dataset(index, indices_dataset)

                    pbar.set_postfix(num_attack_success=num_attack_success)
                    pbar.update()
            except tf.errors.OutOfRangeError:
                pass
Exemple #10
0
tf_model_fn = convert_pytorch_model_to_tf(clf)
cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

# pgd_op = MadryEtAl(cleverhans_model, sess=sess)
pgd_op = ProjectedGradientDescent(cleverhans_model,
                                  sess=sess,
                                  default_rand_init=True)
pgd_params = {
    'eps': 16 / 255.0,
    'eps_iter': 2 / 255.0,
    'nb_iter': 10,
    'clip_min': 0.0,
    'clip_max': 1.0
}
adv_x_op = pgd_op.generate(x_op, y=onehot_op, **pgd_params)

clean_logits_op = tf_model_fn(x_op)
adv_logits_op = tf_model_fn(adv_x_op)

cudnn.benchmark = True

total = -1
for step, (images, labels) in enumerate(train_loader):
    print('To be attacked: {}th, {}'.format(
        total + 1, os.path.basename(train_set.imgs[total + 1][0])))
    start = time.time()

    images = images.to(device)
    labels = labels.to(device)
Exemple #11
0
def train(ARGS):
    # Define helper function for evaluating on test data during training
    def eval(epoch):
        from train_utils import clean_eval
        test_accuracy, test_loss, _ = clean_eval(sess, x, y, is_training,
                                                 testloader, n_classes, logits,
                                                 preds)
        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/accuracy/test',
                              simple_value=test_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/error/test',
                              simple_value=1.0 - test_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/loss/test',
                               simple_value=test_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define helper function for evaluating on adversarial test data during training
    def adv_eval(epoch):
        from train_utils import adversarial_eval
        adv_accuracy, adv_loss = adversarial_eval(sess,
                                                  x,
                                                  y,
                                                  is_training,
                                                  adv_testloader,
                                                  n_classes,
                                                  preds,
                                                  adv_preds,
                                                  eval_all=True)

        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/adversarial-accuracy/test',
                              simple_value=adv_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/adversarial-error/test',
                              simple_value=1.0 - adv_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/adversarial-loss/test',
                               simple_value=adv_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define computational graph
    with tf.Graph().as_default() as g:
        # Define placeholders
        with tf.device('/gpu:0'):
            with tf.name_scope('Placeholders'):
                x = tf.placeholder(dtype=tf.float32,
                                   shape=input_shape,
                                   name='inputs')
                x_pair1 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair1')
                x_pair2 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair2')
                y = tf.placeholder(dtype=tf.float32,
                                   shape=(None, n_classes),
                                   name='labels')
                is_training = tf.placeholder_with_default(True,
                                                          shape=(),
                                                          name='is-training')

        # Define TF session
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(graph=g, config=config)

        # Define model
        with tf.name_scope('Model'):
            with tf.device('/gpu:0'):
                model = Model(nb_classes=n_classes,
                              input_shape=input_shape,
                              is_training=is_training)

                # Define forward-pass
                with tf.name_scope('Logits'):
                    logits = model.get_logits(x)
                with tf.name_scope('Probs'):
                    preds = tf.nn.softmax(logits)

                with tf.name_scope('Accuracy'):
                    ground_truth = tf.argmax(y, axis=1)
                    predicted_label = tf.argmax(preds, axis=1)
                    correct_prediction = tf.equal(predicted_label,
                                                  ground_truth)
                    acc = tf.reduce_mean(tf.to_float(correct_prediction),
                                         name='accuracy')
                    tf.add_to_collection('accuracies', acc)

                    err = tf.identity(1.0 - acc, name='error')
                    tf.add_to_collection('accuracies', err)

                # Define losses
                with tf.name_scope('Losses'):
                    ce_loss, wd_loss, clp_loss, lsq_loss, at_loss, alp_loss = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                    adv_logits = None

                    if ARGS.ct:
                        with tf.name_scope('Cross-Entropy-Loss'):
                            ce_loss = tf.reduce_mean(
                                tf.nn.softmax_cross_entropy_with_logits(
                                    logits=logits, labels=y),
                                name='cross-entropy-loss')

                            tf.add_to_collection('losses', ce_loss)

                    if ARGS.at:
                        with tf.name_scope('Adversarial-Cross-Entropy-Loss'):
                            at_loss, adv_logits = get_at_loss(
                                sess, x, y, model, ARGS.eps, ARGS.eps_iter,
                                ARGS.nb_iter)
                            at_loss = tf.identity(at_loss, name='at-loss')
                            tf.add_to_collection('losses', at_loss)

                    with tf.name_scope('Regularizers'):
                        if ARGS.wd:
                            with tf.name_scope('Weight-Decay'):
                                for var in tf.trainable_variables():
                                    if 'beta' in var.op.name:
                                        # Do not regularize bias of batch normalization
                                        continue
                                    # print('regularizing: ', var.op.name)
                                    wd_loss += tf.nn.l2_loss(var)

                                reg_loss = tf.identity(wd_loss, name='wd-loss')
                                tf.add_to_collection('losses', reg_loss)

                        if ARGS.alp:
                            with tf.name_scope('Adversarial-Logit-Pairing'):
                                alp_loss = get_alp_loss(
                                    sess, x, y, logits, adv_logits, model,
                                    ARGS.eps, ARGS.eps_iter, ARGS.nb_iter)

                                alp_loss = tf.identity(alp_loss,
                                                       name='alp-loss')
                                tf.add_to_collection('losses', alp_loss)

                        if ARGS.clp:
                            with tf.name_scope('Clean-Logit-Pairing'):
                                clp_loss = get_clp_loss(
                                    x_pair1, x_pair2, model)
                                clp_loss = tf.identity(clp_loss,
                                                       name='clp-loss')
                                tf.add_to_collection('losses', clp_loss)

                        if ARGS.lsq:
                            with tf.name_scope('Logit-Squeezing'):
                                lsq_loss = get_lsq_loss(x, model)
                                lsq_loss = tf.identity(lsq_loss,
                                                       name='lsq-loss')
                                tf.add_to_collection('losses', lsq_loss)

                    with tf.name_scope('Total-Loss'):
                        # Define objective function
                        total_loss = (ARGS.ct_lambda * ce_loss) + (
                            ARGS.at_lambda *
                            at_loss) + (ARGS.wd_lambda * wd_loss) + (
                                ARGS.clp_lambda *
                                clp_loss) + (ARGS.lsq_lambda * lsq_loss) + (
                                    ARGS.alp_lambda * alp_loss)

                        total_loss = tf.identity(total_loss, name='total-loss')
                        tf.add_to_collection('losses', total_loss)

                # Define PGD adversary
                with tf.name_scope('PGD-Attacker'):
                    pgd_params = {
                        'ord': np.inf,
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'eps_iter': ARGS.eps_iter / 255,
                        'nb_iter': ARGS.nb_iter,
                        'rand_init': True,
                        'rand_minmax': ARGS.eps / 255,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'sanity_checks': True
                    }

                    pgd = ProjectedGradientDescent(model, sess=sess)
                    adv_x = pgd.generate(x, **pgd_params)

                    with tf.name_scope('Logits'):
                        adv_logits = model.get_logits(adv_x)
                    with tf.name_scope('Probs'):
                        adv_preds = tf.nn.softmax(adv_logits)

        # Define optimizer
        with tf.device('/gpu:0'):
            with tf.name_scope('Optimizer'):
                # Define global step variable
                global_step = tf.get_variable(
                    name='global_step',
                    shape=[],  # scalar
                    dtype=tf.float32,
                    initializer=tf.zeros_initializer(),
                    trainable=False)

                optimizer = tf.train.AdamOptimizer(learning_rate=ARGS.lr,
                                                   beta1=0.9,
                                                   beta2=0.999,
                                                   epsilon=1e-6,
                                                   use_locking=False,
                                                   name='Adam')
                trainable_vars = tf.trainable_variables()

                update_bn_ops = tf.get_collection(
                    tf.GraphKeys.UPDATE_OPS
                )  # this collection stores the moving_mean and moving_variance ops
                #  for batch normalization
                with tf.control_dependencies(update_bn_ops):
                    grads_and_vars = optimizer.compute_gradients(
                        total_loss, trainable_vars)
                    train_step = optimizer.apply_gradients(
                        grads_and_vars, global_step=global_step)

        # Add Tensorboard summaries
        with tf.device('/gpu:0'):
            # Create file writers
            writer_train = tf.summary.FileWriter(ARGS.log_dir + '/train',
                                                 graph=g)
            writer_test = tf.summary.FileWriter(ARGS.log_dir + '/test')

            # Add summary for input images
            with tf.name_scope('Image-Summaries'):
                # Create image summary ops
                tf.summary.image('input',
                                 x,
                                 max_outputs=2,
                                 collections=['training'])

            # Add summaries for the training losses
            losses = tf.get_collection('losses')
            for entry in losses:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for the training accuracies
            accs = tf.get_collection('accuracies')
            for entry in accs:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for all trainable vars
            for var in trainable_vars:
                tf.summary.histogram(var.op.name,
                                     var,
                                     collections=['training'])
                var_norm = tf.norm(var, ord='euclidean')
                tf.summary.scalar(var.op.name + '/l2norm',
                                  var_norm,
                                  collections=['training'])

            # Add summaries for variable gradients
            for grad, var in grads_and_vars:
                if grad is not None:
                    tf.summary.histogram(var.op.name + '/gradients',
                                         grad,
                                         collections=['training'])
                    grad_norm = tf.norm(grad, ord='euclidean')
                    tf.summary.scalar(var.op.name + '/gradients/l2norm',
                                      grad_norm,
                                      collections=['training'])

            # Add summaries for the logits and model predictions
            with tf.name_scope('Logits-Summaries'):
                variable_summaries(tf.identity(logits, name='logits'),
                                   name='logits',
                                   collections=['training', 'test'],
                                   histo=True)
            with tf.name_scope('Predictions-Summaries'):
                variable_summaries(tf.identity(preds, name='predictions'),
                                   name='predictions',
                                   collections=['training', 'test'],
                                   histo=True)

        # Initialize all variables
        with sess.as_default():
            tf.global_variables_initializer().run()

        # Collect training params
        train_params = {
            'epochs': ARGS.epochs,
            'eval_step': ARGS.eval_step,
            'adv_eval_step': ARGS.adv_eval_step,
            'n_classes': n_classes,
            'clp': ARGS.clp
        }

        # Start training loop
        model_train(sess,
                    x,
                    y,
                    x_pair1,
                    x_pair2,
                    is_training,
                    trainloader,
                    train_step,
                    args=train_params,
                    evaluate=eval,
                    adv_evaluate=adv_eval,
                    writer_train=writer_train)

        # Save the trained model
        if ARGS.save:
            save_path = os.path.join(ARGS.save_dir, ARGS.filename)
            saver = tf.train.Saver(var_list=tf.global_variables())
            saver.save(sess, save_path)
            print("Saved model at {:s}".format(str(ARGS.save_dir)))
Exemple #12
0
    def attack(self, path, session):
        print_and_log(self.logfile, "")  # add a blank line
        print_and_log(self.logfile, 'Attacking model {0:}: '.format(path))
        self.model = self.init_model()
        self.model.load_state_dict(torch.load(path))
        pgd_parameters = self.pgd_params()

        class_index = 0
        context_images, target_images, context_labels, target_labels, context_images_np = None, None, None, None, None

        def model_wrapper(context_point_x):
            # Insert context_point at correct spot
            context_images_attack = torch.cat([
                context_images[0:class_index], context_point_x,
                context_images[class_index + 1:]
            ],
                                              dim=0)

            target_logits = self.model(context_images_attack, context_labels,
                                       target_images)
            return target_logits[0]

        tf_model_conv = convert_pytorch_model_to_tf(model_wrapper,
                                                    out_dims=self.args.way)
        tf_model = cleverhans.model.CallableModelWrapper(
            tf_model_conv, 'logits')
        pgd = ProjectedGradientDescent(tf_model,
                                       sess=session,
                                       dtypestr='float32')

        for item in self.test_set:

            for t in range(self.args.attack_tasks):

                task_dict = self.dataset.get_test_task(item, session)
                context_images, target_images, context_labels, target_labels, context_images_np = self.prepare_task(
                    task_dict, shuffle=False)
                # Detach shares storage with the original tensor, which isn't what we want.
                context_images_attack_all = context_images.clone()
                # Is require_grad true here, for context_images?

                for c in torch.unique(context_labels):
                    # Adversarial input context image
                    class_index = extract_class_indices(context_labels,
                                                        c)[0].item()
                    context_x = np.expand_dims(context_images_np[class_index],
                                               0)

                    # Input to the model wrapper is automatically converted to Torch tensor for us

                    x = tf.placeholder(tf.float32, shape=context_x.shape)

                    adv_x_op = pgd.generate(x, **pgd_parameters)
                    preds_adv_op = tf_model.get_logits(adv_x_op)

                    feed_dict = {x: context_x}
                    adv_x, preds_adv = session.run((adv_x_op, preds_adv_op),
                                                   feed_dict=feed_dict)

                    context_images_attack_all[class_index] = torch.from_numpy(
                        adv_x)

                    save_image(adv_x,
                               os.path.join(self.checkpoint_dir, 'adv.png'))
                    save_image(context_x,
                               os.path.join(self.checkpoint_dir, 'in.png'))

                    acc_after = torch.mean(
                        torch.eq(
                            target_labels,
                            torch.argmax(torch.from_numpy(preds_adv).to(
                                self.device),
                                         dim=-1)).float()).item()

                    with torch.no_grad():
                        logits = self.model(context_images, context_labels,
                                            target_images)
                        acc_before = torch.mean(
                            torch.eq(target_labels,
                                     torch.argmax(logits,
                                                  dim=-1)).float()).item()
                        del logits

                    diff = acc_before - acc_after
                    print_and_log(
                        self.logfile,
                        "Task = {}, Class = {} \t Diff = {}".format(
                            t, c, diff))

                print_and_log(self.logfile,
                              "Accuracy before {}".format(acc_after))
                logits = self.model(context_images_attack_all, context_labels,
                                    target_images)
                acc_all_attack = torch.mean(
                    torch.eq(target_labels,
                             torch.argmax(logits, dim=-1)).float()).item()
                print_and_log(self.logfile,
                              "Accuracy after {}".format(acc_all_attack))
Exemple #13
0
def main(argv):
    del argv  # unused

    args_keys = [
        'namespace', 'seed', 'num_images', 'batch_size', 'attack_models',
        'eval_models', 'epsilon', 'eps_iter', 'nb_iter',
        'attack_differentiable_slq', 'eval_only'
    ]

    args_dict = {
        k: v
        for k, v in FLAGS.flag_values_dict().items() if k in args_keys
    }

    logging.info('')
    for k in args_keys:
        logging.info('%-20s = %s' % (k, args_dict[k]))
    logging.info('')

    with JobbyJob(args_dict, namespace=FLAGS.namespace) as job:
        tf.set_random_seed(FLAGS.seed)

        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=1.,
                                      allow_growth=True)))

        keras.backend.set_session(sess)
        keras.backend.set_learning_phase(0)

        with tf.name_scope('TFRecordsLoader'):
            dataset = load_tfrecords_dataset(TFRECORDS_FILENAMES)
            dataset = dataset.take(FLAGS.num_images)
            dataset = dataset.batch(FLAGS.batch_size)
            iterator = dataset.make_one_shot_iterator()
            next_element = iterator.get_next()

            X, y_true = next_element

        with sess.as_default():
            attack_model_paths = [
                MODEL_NAME_TO_CKPT_PATH_MAP[m] for m in FLAGS.attack_models
            ]
            eval_model_paths = [
                MODEL_NAME_TO_CKPT_PATH_MAP[m] for m in FLAGS.eval_models
            ]

            eval_model = EvalSHIELDModel(
                load_jpeg_trained_ensemble(FLAGS.eval_models,
                                           eval_model_paths))

            perform_attack = not FLAGS.eval_only
            if perform_attack:
                attack_model = AttackSHIELDModel(
                    load_jpeg_trained_ensemble(FLAGS.attack_models,
                                               attack_model_paths),
                    attack_differentiable_slq=FLAGS.attack_differentiable_slq)

                y_target = attack_model.get_least_likely_prediction(X)
                y_target_one_hot = tf.one_hot(y_target, 1000, axis=-1)

                attack = ProjectedGradientDescent(attack_model, sess=sess)
                attack_kwargs = {
                    'y_target': y_target_one_hot,
                    'eps': FLAGS.epsilon,
                    'eps_iter': FLAGS.eps_iter,
                    'nb_iter': FLAGS.nb_iter
                }

                X_adv = attack.generate(X, **attack_kwargs)
                y_pred_shield = eval_model.get_predicted_class(X_adv)
            else:
                y_target = y_true * -1
                y_pred_shield = eval_model.get_predicted_class(X)

            writer = tf.summary.FileWriter(LOGS_DIR, sess.graph)
            writer.close()

            model_accuracy = AccuracyMeter()
            attack_success = AccuracyMeter()
            with tqdm(total=FLAGS.num_images, unit='imgs') as pbar:
                while True:
                    try:
                        y_true_np, y_target_np, y_pred_shield_np = \
                            sess.run([y_true, y_target, y_pred_shield])

                        model_accuracy.offer(y_pred_shield_np, y_true_np)
                        attack_success.offer(y_pred_shield_np, y_target_np)

                        pbar.set_postfix(
                            model_accuracy=model_accuracy.evaluate(),
                            attack_success=attack_success.evaluate())
                        pbar.update(y_true_np.shape[0])

                    except tf.errors.OutOfRangeError:
                        break

        job.update_output(model_accuracy=model_accuracy.evaluate(),
                          attack_success=attack_success.evaluate())

        logging.info('')
        logging.info('model_accuracy = %.04f' % model_accuracy.evaluate())
        logging.info('attack_success = %.04f' % attack_success.evaluate())
def PGD(torch_model, dataset, eps_list, opt, c, h, w, clip_min, clip_max):

    if opt == 'evaluate':
        acclist = []
        for eps in eps_list:
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = ProjectedGradientDescent(cleverhans_model, sess=sess)
            atk_params = {
                'eps': eps,
                'clip_min': clip_min,
                'clip_max': clip_max
            }
            adv_x_op = atk_op.generate(x_op, **atk_params)
            adv_preds_op = tf_model_fn(adv_x_op)

            # Run an evaluation of our model against fgsm
            total = 0
            correct = 0
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
                correct += (np.argmax(
                    adv_preds, axis=1) == ys.cpu().detach().numpy()).sum()
                total += dataset.batch_size

            acc = float(correct) / total
            print('Adv accuracy: {:.3f}'.format(acc * 100))
            acclist.append(acc)
        return acclist

    elif opt == 'generate':
        advpacklist = []
        for eps in eps_list:
            advlist = []
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = ProjectedGradientDescent(cleverhans_model, sess=sess)
            atk_params = {
                'eps': eps,
                'clip_min': clip_min,
                'clip_max': clip_max
            }
            adv_x_op = atk_op.generate(x_op, **atk_params)

            total = 0
            # Run an evaluation of our model against fgsm
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op:
                                                                     xs}))
                if ys == np.argmax(torch_model(xs).data.cpu().numpy()):
                    pred = np.argmax(torch_model(adv).data.cpu().numpy())
                    if ys != pred:
                        print('OK')
                        total += 1
                        print(total)
                        adv = adv.numpy()
                        advlist.append(adv)
                if total == 500:
                    break
            print(len(advlist))
            advpacklist.append(advlist)
        return advpacklist
Exemple #15
0
def eval(sess, model_name, X_train, Y_train, X_test, Y_test, cnn=False, rbf=False, fgsm=False, jsma=False, df=False, bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("mnist_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name), custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 784))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args={ "batch_size" : 128 })
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)
    
    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on fgsm adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {'theta': 2., 'gamma': 0.145,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on jsma adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10,
                 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on df adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))

    print('Accuracy results outputted to mnist_results.txt')
    text_file.close()

    # Close TF session
    sess.close()
Exemple #16
0
        clip_max = (1.0 + 1E-6 - min(mean)) / std[0]
        params = {'eps': eps,
                  'clip_min': clip_min,
                  'clip_max': clip_max,
                  'eps_iter': 0.005,
                  'nb_iter': 100,
                  'rand_init': False}
    elif args.attack == 'FGSM':
        attk = FastGradientMethod(ch_model, sess=sess)
        clip_min = (0.0 - 1E-6 - max(mean)) / std[0]
        clip_max = (1.0 + 1E-6 - min(mean)) / std[0]
        params = {'eps': eps,
                  'clip_min': clip_min,
                  'clip_max': clip_max}

    adv_x = attk.generate(x_op, **params)
    adv_preds_op = tf_model(adv_x)

    stime = time.time()

    tot_clean_err, tot_adv_err, tot = 0.0, 0.0, 0
    # tot_adv_input_err = 0.0

    clean_detail = list()
    detail = list()

    for i, (xs, ys) in enumerate(test_loader):
        ys = ys.numpy()

        clean_preds = model(xs.cuda()).detach().cpu().numpy()
Exemple #17
0
def main(train_method, dataset, model_name, params):
    # prepare dataset and normalize settings
    normalize = None
    if params.get('normalized', False):
        if dataset == 'mnist':
            normalize = (_MNIST_MEAN, _MNIST_STDDEV)
        elif dataset == 'cifar10':
            normalize = (_CIFAR10_MEAN, _CIFAR10_STDDEV)
        elif dataset == 'imagenet':
            normalize = (_IMAGENET_MEAN, _IMAGENET_STDDEV)
    train_set = get_dataset(dataset, 'train', normalize)
    test_set = get_dataset(dataset, 'test', normalize)

    # read input shape (c, h, w)
    input_shape = get_input_shape(dataset)

    # read params
    batch_size = params['batch_size']
    optimizer_name = params.get('optimizer', 'sgd')
    if optimizer_name == 'sgd':
        lr = params.get('learning_rate', 0.1)
        momentum = params.get('momentum', 0.1)
        weight_decay = params.get('weight_decay', 5e-4)
    elif optimizer_name == 'adam':
        lr = params.get('learning_rate', 0.1)
    else:
        raise NotImplementedError
    cur_lr = lr
    print('default learning rate =', cur_lr, file=stderr)
    start_epoch = 0
    epochs = params.get('epochs', 0)
    eps = normed_eps = params['eps']
    if train_method == 'adv':
        # Note: for adversarial training, in training phase, we use the manual implementation version for precision,
        # and use the clearhans implementation in test phase for precision
        eps_iter_coef = params['eps_iter_coef']
        clip_min = params['clip_min']
        clip_max = params['clip_max']
        if normalize is not None:
            mean, std = normalize
            clip_min = (clip_min - max(mean)) / min(std) - 1e-6
            clip_max = (clip_max - min(mean)) / min(std) + 1e-6
            normed_eps = eps / min(std)
        nb_iter = params['nb_iter']
        rand_init = params['rand_init']

        adv_params = {
            'eps': normed_eps,
            'clip_min': clip_min,
            'clip_max': clip_max,
            'eps_iter': eps_iter_coef * eps,
            'nb_iter': nb_iter,
            'rand_init': rand_init
        }
    elif train_method == 'certadv':
        # Note: for certified adversarially trained models, we test its accuracy still using PGD attack
        eps_iter_coef = params['eps_iter_coef']
        clip_min = params['clip_min']
        clip_max = params['clip_max']
        if normalize is not None:
            mean, std = normalize
            clip_min = (clip_min - max(mean)) / min(std) - 1e-6
            clip_max = (clip_max - min(mean)) / min(std) + 1e-6
            normed_eps = eps / min(std)
        nb_iter = params['nb_iter']
        rand_init = params['rand_init']

        adv_params = {
            'eps': normed_eps,
            'clip_min': clip_min,
            'clip_max': clip_max,
            'eps_iter': eps_iter_coef * eps,
            'nb_iter': nb_iter,
            'rand_init': rand_init
        }
        print(adv_params, file=stderr)

    # prepare loader
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size,
                                               shuffle=True,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size,
                                              shuffle=True,
                                              pin_memory=True)

    # stats
    train_tot = len(train_set)
    test_tot = len(test_set)

    best_acc = 0.0
    best_robacc = 0.0

    # load model
    m = model.load_model('exp', dataset, model_name).cuda()
    print(m)

    if train_method == 'adv' and params['retrain']:
        # retrain from the best clean model
        clean_model_name = f'{dataset}_{model_name}_clean_0_best'
        new_m, stats = try_load_weight(m, clean_model_name)
        assert stats == True, "Could not load pretrained clean model."
        if isinstance(new_m[0], NormalizeLayer):
            # squeeze the normalize layer out
            new_m = new_m[1]
        m = new_m
    elif train_method == 'certadv':
        configdir = params['configpath']
        ds_mapping = {'cifar10': 'cifar', 'mnist': 'mnist'}
        ds_multiplier = {'cifar10': 255., 'mnist': 10.}
        configfilename = f'exp_{ds_mapping[dataset]}{int(round(eps * ds_multiplier[dataset]))}.json'
        with open(os.path.join(configdir, configfilename), 'r') as f:
            real_config = json.load(f)
        epochs = real_config['training_params']['epochs']
        start_epoch = epochs - 1
        model_path = os.path.join(
            os.path.join(real_config['path_prefix'],
                         real_config['models_path']), f'{model_name}_best.pth')
        d = torch.load(model_path)
        print(f'certadv load from {model_path}', file=stderr)
        m.load_state_dict(d['state_dict'])

    # open file handler
    save_name = f'{ds}_{model_name}_{now_method}_{eps}'
    mode = 'a'
    if os.path.exists(f'{SAVE_PATH}/{save_name}_train.log') or os.path.exists(
            f'{SAVE_PATH}/{save_name}_test.log'):
        choice = getpass.getpass(
            f'Log exists. Do you want to rewrite it? (Y/others) ')
        if choice == 'Y':
            mode = 'w'
            print('Rewrite log', file=stderr)
        else:
            mode = 'a'
    train_log = open(f'{SAVE_PATH}/{save_name}_train.log', mode)
    test_log = open(f'{SAVE_PATH}/{save_name}_test.log', mode)

    # special treatment for model G - layerwise training
    if model_name == 'G' and train_method == 'adv':
        new_last_layer = nn.Linear(1024, 10)

    # start
    for epoch in range(start_epoch, epochs):

        if epoch % LR_REDUCE == 0 and epoch > 0:
            # learning rate reduced to LR_REDUCE_RATE every LR_REDUCE epochs
            cur_lr *= LR_REDUCE_RATE
            print(f'  reduce learning rate to {cur_lr}', file=stderr)

        # special treatment for model G - layerwise training
        if model_name == 'G' and train_method == 'adv':
            new_m = list()
            tmp_cnt = 0
            for l in m:
                new_m.append(l)
                if isinstance(l, nn.Linear) and l.out_features == 1024:
                    tmp_cnt += 1
                if tmp_cnt > epoch / 5:
                    if l.out_features == 1024:
                        new_m.append(nn.ReLU())
                        new_m.append(new_last_layer)
                    break
            new_m = nn.Sequential(*new_m).cuda()
            m, new_m = new_m, m
            print(m, file=stderr)
            cur_lr = lr
            print(f'  learning rate restored to {cur_lr}', file=stderr)

        # init optimizer
        if optimizer_name == 'adam':
            opt = optim.Adam(m.parameters(), lr=cur_lr)
        elif optimizer_name == 'sgd':
            opt = optim.SGD(m.parameters(),
                            lr=cur_lr,
                            momentum=momentum,
                            weight_decay=weight_decay)
        else:
            raise Exception("Fail to create the optimizer")

        cur_idx = 0
        cur_acc = 0.0
        cur_robacc = 0.0

        batch_tot = 0
        batch_acc_tot = 0
        batch_robacc_tot = 0

        clean_ce = 0.0
        adv_ce = 0.0

        # now eps
        now_eps = normed_eps * min((epoch + 1) / EPS_WARMUP_EPOCHS, 1.0)
        # =========== Training ===========
        print(f'Epoch {epoch}: training', file=stderr)
        if train_method != 'clean':
            print(f'  Training eps={now_eps:.3f}', file=stderr)
        m.train()

        for i, (X, y) in enumerate(train_loader):

            if DEBUG and i > 10:
                break

            start_t = time.time()

            X_clean, y_clean = X.cuda(), y.cuda().long()
            clean_out = m(Variable(X_clean))
            clean_ce = nn.CrossEntropyLoss()(clean_out, Variable(y_clean))

            batch_tot = X.size(0)
            batch_acc_tot = (
                clean_out.data.max(1)[1] == y_clean).float().sum().item()

            if train_method == 'clean':
                opt.zero_grad()
                clean_ce.backward()
                opt.step()

            elif train_method == 'adv':
                X_pgd = Variable(X, requires_grad=True)
                for _ in range(nb_iter):
                    opt_pgd = optim.Adam([X_pgd], lr=1e-3)
                    opt.zero_grad()
                    loss = nn.CrossEntropyLoss()(m(X_pgd.cuda()),
                                                 Variable(y_clean))
                    loss.backward()
                    eta = now_eps * eps_iter_coef * X_pgd.grad.data.sign()
                    X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
                    eta = torch.clamp(X_pgd.data - X, -now_eps, now_eps)
                    X_pgd.data = X + eta
                    X_pgd.data = torch.clamp(X_pgd.data, clip_min, clip_max)

                # print(X_pgd.data, la.norm((X_pgd.data - X).numpy().reshape(-1), np.inf), file=stderr)
                adv_out = m(Variable(X_pgd.data).cuda())
                adv_ce = nn.CrossEntropyLoss()(adv_out, Variable(y_clean))
                batch_robacc_tot = (
                    adv_out.data.max(1)[1] == y_clean).float().sum()

                opt.zero_grad()
                adv_ce.backward()
                opt.step()

            elif train_method == 'certadv':
                # no action to do for training
                adv_ce = torch.Tensor([0.0]).cuda()
                pass

            end_t = time.time()

            clean_ce = clean_ce.detach().cpu().item()
            if train_method != 'clean':
                adv_ce = adv_ce.detach().cpu().item()

            runtime = end_t - start_t
            cur_acc = (cur_acc * cur_idx + batch_acc_tot) / (cur_idx +
                                                             batch_tot)
            if train_method != 'clean':
                cur_robacc = (cur_robacc * cur_idx +
                              batch_robacc_tot) / (cur_idx + batch_tot)
            cur_idx += batch_tot

            print(
                f'{epoch} {cur_idx} {cur_acc} {cur_robacc} {batch_acc_tot/batch_tot:.3f} {batch_robacc_tot/batch_tot:.3f}'
                f' {clean_ce:.3f} {adv_ce:.3f} {runtime:.3f}',
                file=train_log)
            if i % STEP == 0 or cur_idx == train_tot:
                print(
                    f'  [train] {epoch}/{cur_idx} acc={cur_acc:.3f}({batch_acc_tot/batch_tot:.3f}) '
                    f'robacc={cur_robacc:.3f}({batch_robacc_tot/batch_tot:.3f}) ce={clean_ce:.3f} adv_ce={adv_ce:.3f} time={runtime:.3f}',
                    file=stderr)

        train_log.flush()

        # =========== Testing ===========
        print(f'Epoch {epoch}: testing', file=stderr)
        m.eval()
        torch.set_grad_enabled(False)

        cur_idx = 0
        cur_acc = 0.0
        cur_robacc = 0.0

        batch_tot = 0
        batch_acc_tot = 0
        batch_robacc_tot = 0

        clean_ce = 0.0
        adv_ce = 0.0

        if train_method in ['adv', 'certadv']:
            tf_model = convert_pytorch_model_to_tf(m)
            ch_model = CallableModelWrapper(tf_model, output_layer='logits')
            x_op = tf.placeholder(tf.float32,
                                  shape=(None, ) + tuple(input_shape))
            sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=0.5)))
            attk = ProjectedGradientDescent(ch_model, sess=sess)
            adv_x = attk.generate(x_op, **adv_params)
            adv_preds_op = tf_model(adv_x)

        for i, (X, y) in enumerate(test_loader):

            if DEBUG and i >= 10:
                break

            start_t = time.time()

            X_clean, y_clean = X.cuda(), y.cuda().long()
            clean_out = m(Variable(X_clean))
            clean_ce = nn.CrossEntropyLoss()(clean_out, Variable(y_clean))

            batch_tot = X.size(0)
            batch_acc_tot = (
                clean_out.data.max(1)[1] == y_clean).float().sum().item()

            if train_method in ['adv', 'certadv']:

                (adv_preds, ) = sess.run((adv_preds_op, ), feed_dict={x_op: X})
                adv_preds = torch.Tensor(adv_preds)

                adv_ce = nn.CrossEntropyLoss()(adv_preds, Variable(y))
                batch_robacc_tot = (
                    adv_preds.data.max(1)[1] == y).float().sum().item()

            # elif train_method == 'certadv':
            #
            #     adv_ce, robust_err = robust_loss(m, eps,
            #                                      Variable(X_clean), Variable(y_clean),
            #                                      proj=50, norm_type='l1_median', bounded_input=True)
            #
            #     batch_robacc_tot = (1.0 - robust_err) * batch_tot

            end_t = time.time()

            clean_ce = clean_ce.detach().cpu().item()
            if train_method != 'clean':
                adv_ce = adv_ce.detach().cpu().item()

            runtime = end_t - start_t
            cur_acc = (cur_acc * cur_idx + batch_acc_tot) / (cur_idx +
                                                             batch_tot)
            if train_method != 'clean':
                cur_robacc = (cur_robacc * cur_idx +
                              batch_robacc_tot) / (cur_idx + batch_tot)
            cur_idx += batch_tot

            print(
                f'{epoch} {cur_idx} {cur_acc} {cur_robacc} {batch_acc_tot / batch_tot:.3f} {batch_robacc_tot / batch_tot:.3f}'
                f' {clean_ce} {adv_ce} {runtime:.3f}',
                file=test_log)
            if i % STEP == 0 or cur_idx == train_tot:
                print(
                    f'  [test] {epoch}/{cur_idx} acc={cur_acc:.3f}({batch_acc_tot / batch_tot:.3f}) '
                    f'robacc={cur_robacc:.3f}({batch_robacc_tot / batch_tot:.3f}) time={runtime:.3f}',
                    file=stderr)

        torch.set_grad_enabled(True)

        if model_name == 'G' and train_method == 'adv':
            # switch back
            m, new_m = new_m, m

        def save_with_configs(m, path):
            torch.save(
                {
                    'state_dict': m.state_dict(),
                    'acc': cur_acc,
                    'robacc': cur_robacc,
                    'epoch': epoch,
                    'normalized': normalize is not None,
                    'dataset': dataset
                }, path)

        if not os.path.exists(f'{SAVE_PATH}/{save_name}_chkpt'):
            os.makedirs(f'{SAVE_PATH}/{save_name}_chkpt')
        save_with_configs(
            m, f'{SAVE_PATH}/{save_name}_chkpt/{save_name}_ep_{epoch:03d}.pth')
        if (train_method == 'clean'
                and cur_acc > best_acc) or (train_method != 'clean'
                                            and cur_robacc > best_robacc):
            save_with_configs(m, f'{SAVE_PATH}/{save_name}_best.pth')
            print(
                f"  Updated, acc {best_acc:.3f} => {cur_acc:.3f} robacc {best_robacc:.3f} => {cur_robacc:.3f}",
                file=stderr)
            best_acc = cur_acc
            best_robacc = cur_robacc

        test_log.flush()

        # memory clean after each batch
        torch.cuda.empty_cache()
        if train_method == 'adv':
            sess.close()

    train_log.close()
    test_log.close()
Exemple #18
0
def evaluate_trans(loader,
                   dataset,
                   model,
                   epoch,
                   epsilon,
                   ref_model,
                   clip_min=0.,
                   clip_max=1.,
                   eps_iter=0.005,
                   nb_iter=100,
                   rand_init=False,
                   verbose=20):
    batch_time = AverageMeter()
    losses = AverageMeter()
    errors = AverageMeter()

    params = {
        'eps': epsilon,
        'clip_min': clip_min,
        'clip_max': clip_max,
        'eps_iter': eps_iter,
        'nb_iter': nb_iter,
        'rand_init': rand_init
    }

    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    model.eval()
    ref_model.eval()
    tf_model = convert_pytorch_model_to_tf(ref_model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    attk = ProjectedGradientDescent(cleverhans_model, sess=sess)
    adv_x_op = attk.generate(x_op, **params)

    end = time.time()
    for i, (X, y) in enumerate(loader):

        X_adv = sess.run((adv_x_op), feed_dict={x_op: X})
        X, y = Variable(torch.tensor(X_adv)).cuda(), y.cuda()
        out = model(Variable(X))
        ce = nn.CrossEntropyLoss()(out, Variable(y))
        err = (out.data.max(1)[1] != y).float().sum() / X.size(0)

        # measure accuracy and record loss
        losses.update(ce.item(), X.size(0))
        errors.update(err.item(), X.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        endline = '\n' if i % verbose == 0 else '\r'
        print('Adv test: [{0}/{1}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Error {error.val:.3f} ({error.avg:.3f})'.format(
                  i,
                  len(loader),
                  batch_time=batch_time,
                  loss=losses,
                  error=errors),
              end=endline)

        if DEBUG and i == 10:
            break

    print('\n * Error {error.avg:.3f}'.format(error=errors))
    return losses.avg, errors.avg
Exemple #19
0
def trans_train(loader,
                model,
                opt,
                epoch,
                epsilon,
                ref_model,
                clip_min=0.,
                clip_max=1.,
                eps_iter=0.005,
                nb_iter=100,
                rand_init=False,
                verbose=20):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    errors = AverageMeter()

    model.train()

    params = {
        'eps': epsilon,
        'clip_min': clip_min,
        'clip_max': clip_max,
        'eps_iter': eps_iter,
        'nb_iter': nb_iter,
        'rand_init': rand_init
    }

    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    tf_model = convert_pytorch_model_to_tf(ref_model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    attk = ProjectedGradientDescent(cleverhans_model, sess=sess)
    adv_x_op = attk.generate(x_op, **params)

    end = time.time()
    for i, (X, y) in enumerate(loader):
        X_adv = sess.run((adv_x_op), feed_dict={x_op: X})

        X, y = Variable(torch.tensor(X_adv)).cuda(), y.cuda()
        data_time.update(time.time() - end)

        out = model(Variable(X))
        ce = nn.CrossEntropyLoss()(out, Variable(y))
        err = (out.data.max(1)[1] != y).float().sum() / X.size(0)

        opt.zero_grad()
        ce.backward()
        opt.step()

        batch_time.update(time.time() - end)
        end = time.time()
        losses.update(ce.item(), X.size(0))
        errors.update(err.item(), X.size(0))

        if verbose and i % verbose == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Error {errors.val:.3f} ({errors.avg:.3f})'.format(
                      epoch,
                      i,
                      len(loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      errors=errors))

        if DEBUG and i == 10:
            break

    return losses.avg, errors.avg
Exemple #20
0
class PGDAdaptor(BasicAdaptor):
    """
        ** Not a real attack **
        For PGD attack, which only provides the lower bound for the robust radius
    """
    def __init__(self, dataset, model):
        super(PGDAdaptor, self).__init__(dataset, model)

        self.config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=0.5))
        self.config.gpu_options.allow_growth = True
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph, config=self.config)

        input_shape = get_input_shape(dataset)

        with self.sess.graph.as_default():
            with self.sess.as_default():
                self.tf_model = convert_pytorch_model_to_tf(self.model)
                self.ch_model = CallableModelWrapper(self.tf_model,
                                                     output_layer='logits')

                self.x_op = tf.placeholder(tf.float32,
                                           shape=(
                                               None,
                                               input_shape[0],
                                               input_shape[1],
                                               input_shape[2],
                                           ))
                self.attk = ProjectedGradientDescent(self.ch_model,
                                                     sess=self.sess)

        self.adv_preds_ops = dict()

    def verify(self, input, label, norm_type, radius):

        # only support Linfty norm
        assert norm_type == 'inf'

        xs = input.unsqueeze(0)
        clean_preds = self.model(xs.cuda()).detach().cpu().numpy()
        clean_pred = np.argmax(clean_preds[0])
        if clean_pred != label:
            return False
        if radius == 0:
            return True

        with self.sess.graph.as_default():
            with self.sess.as_default():
                if radius not in self.adv_preds_ops:
                    params = {
                        'eps': radius,
                        'clip_min': 0.0,
                        'clip_max': 1.0,
                        'eps_iter': radius / 50.0,
                        'nb_iter': 100,
                        'rand_init': False
                    }
                    adv_x = self.attk.generate(self.x_op, **params)
                    self.adv_preds_ops[radius] = self.tf_model(adv_x)
                (adv_preds, ) = self.sess.run((self.adv_preds_ops[radius], ),
                                              feed_dict={self.x_op: xs})

                adv_pred = np.argmax(adv_preds[0])
        return adv_pred == label
def eval_robustness(ARGS, verbose=True):
    #############################################
    # Load pre-trained model
    #############################################

    if verbose:
        print('\n- Loading pre-trained model...')

    # Build evaluation graph
    eval_graph = tf.Graph()
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(graph=eval_graph, config=config)

    # Define input TF placeholder
    with eval_graph.as_default():
        with tf.device('/gpu:0'):
            # Define placeholders
            with tf.name_scope('Placeholders'):
                x = tf.placeholder(dtype=tf.float32,
                                   shape=input_shape,
                                   name='inputs')
                y = tf.placeholder(dtype=tf.float32,
                                   shape=(None, n_classes),
                                   name='labels')
                is_training = tf.placeholder_with_default(False,
                                                          shape=(),
                                                          name='is-training')

            # Define model
            with tf.name_scope('Model'):
                model = Model(nb_classes=n_classes,
                              input_shape=input_shape,
                              is_training=is_training)

            # Define forward-pass
            with tf.name_scope('Logits'):
                logits = model.get_logits(x)
            with tf.name_scope('Probs'):
                preds = tf.nn.softmax(logits)

            # Restore the pre-trained model
            with sess.as_default():
                saver = tf.train.Saver()
                saver.restore(sess, ARGS.restore_path + '/model.ckpt')

            # Define accuracy ops
            with tf.name_scope('Accuracy'):
                ground_truth = tf.argmax(y, axis=1)
                predicted_label = tf.argmax(preds, axis=1)
                correct_prediction = tf.equal(predicted_label, ground_truth)
                clean_acc = tf.reduce_mean(tf.to_float(correct_prediction),
                                           name='accuracy')

            # Define PGD adversary
            if ARGS.attack == 'PGD':
                if verbose:
                    print('\n- Building {:s} attack graph...'.format(
                        ARGS.attack))

                with tf.name_scope('PGD-Attacker'):
                    pgd_params = {
                        'ord': np.inf,
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'eps_iter': ARGS.eps_iter / 255,
                        'nb_iter': ARGS.nb_iter,
                        'rand_init': ARGS.rand_init,
                        'rand_minmax': ARGS.eps / 255,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'sanity_checks': True
                    }

                    pgd = ProjectedGradientDescent(model, sess=None)
                    adv_x = pgd.generate(x, **pgd_params)

            # Define SPSA adversary
            elif ARGS.attack == 'SPSA':
                if verbose:
                    print('\n- Building {:s} attack graph...'.format(
                        ARGS.attack))

                with tf.name_scope('PGD-Attacker'):
                    spsa_params = {
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'nb_iter': ARGS.nb_iter,
                        'spsa_samples': ARGS.spsa_samples,
                        'spsa_iters': ARGS.spsa_iters,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'learning_rate': ARGS.spsa_lr,
                        'delta': ARGS.spsa_delta
                    }

                    spsa = SPSA(model, sess=sess)
                    adv_x = spsa.generate(x, **spsa_params)
            else:
                raise NotImplementedError

            with tf.name_scope('Logits'):
                adv_logits = model.get_logits(adv_x)
            with tf.name_scope('Probs'):
                adv_preds = tf.nn.softmax(adv_logits)

            adv_loss = tf.nn.softmax_cross_entropy_with_logits(
                logits=adv_logits, labels=y)
            adv_predicted_label = tf.argmax(adv_preds, axis=1)
            correct_prediction = tf.equal(adv_predicted_label, ground_truth)
            adv_accuracy = tf.reduce_mean(tf.to_float(correct_prediction),
                                          name='adv-accuracy')
            is_adv_example = tf.not_equal(ground_truth, adv_predicted_label)

    #############################################
    # Run evaluation
    #############################################

    if verbose:
        print('\n- Running robustness evaluation against {:s} attacker...\n'.
              format(ARGS.attack))

    if ARGS.attack == 'PGD':
        clean, adv_mean, adv_worstcase = run_pgd_eval(x,
                                                      y,
                                                      is_training,
                                                      sess,
                                                      adv_testloader,
                                                      clean_acc,
                                                      adv_accuracy,
                                                      adv_loss,
                                                      is_adv_example,
                                                      ARGS,
                                                      save_loss_dist=False,
                                                      verbose=verbose)

    elif ARGS.attack == 'SPSA':
        clean, adv_mean = run_spsa_eval(x,
                                        y,
                                        is_training,
                                        sess,
                                        adv_testloader,
                                        clean_acc,
                                        adv_accuracy,
                                        adv_loss,
                                        is_adv_example,
                                        ARGS,
                                        save_loss_dist=False,
                                        verbose=verbose)
        adv_worstcase = adv_mean
    else:
        raise NotImplementedError

    return clean, adv_mean, adv_worstcase
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    #x##########################################################
    #Tensor("Placeholder:0", shape=(?, 28, 28, 1), dtype=float32)
    #<class 'tensorflow.python.framework.ops.Tensor'>
    ###########################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))
        return acc

    if clean_train:
        model = ModelBasicCNN(
            'model1', nb_classes, nb_filters
        )  # <cleverhans_tutorials.tutorial_models.ModelBasicCNN object at 0x7f81feaae240>

        preds = model.get_logits(
            x
        )  # Tensor("model1_1/dense/BiasAdd:0", shape=(?, 10), dtype=float32)

        loss = CrossEntropy(
            model, smoothing=label_smoothing
        )  # <cleverhans.loss.CrossEntropy object at 0x7f819466b470>

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph

        fgsm = ProjectedGradientDescent(
            model, sess=sess
        )  # TODO                     # <cleverhans.attacks.FastGradientMethod object at 0x7feabc77ce80>
        start = time.time()
        adv_x = fgsm.generate(
            x, **fgsm_params
        )  # Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32)

        #imagetest = np.squeeze(adv_x)
        #plt.imshow(imagetest)

        preds_adv = model.get_logits(
            adv_x
        )  # Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32)
        end = time.time()
        a = end - start
        print("Attack time = ")
        print(a)
        print("")

        #Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32)
        #Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        start = time.time()
        acc_result = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval',
                             True)
        end = time.time()

        b = end - start

        print("")
        print("Inference function time = ")
        print(b)
        print("")

        values = [b, acc_result * 100, 0, 0, 0]
        x_labels = [
            'Time(s)', 'Accuracy(%)', '', 'Method2 Time(s)',
            'Method2 Accuracy(%)'
        ]
        plt.bar(x_labels, values)
        plt.show()

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print("END!")
Exemple #23
0
def main(argv):
    del argv

    if FLAGS.debug:
        logging.info('Running in debug mode!!!')

    random.seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)
    TFRECORDS_DIR = FLAGS.tfrecords_dir
    HDF5_DATA_PATH = FLAGS.hdf5_data_path

    tfrecord_filepaths = glob(os.path.join(TFRECORDS_DIR, '*'))
    tf_dataset = tfutils.make_dataset(tfrecord_filepaths,
                                      batch_size=1,
                                      filter_label=FLAGS.label,
                                      preprocessing_fn=preprocess_input)

    hdf5_dataset = None
    if not FLAGS.debug:
        hdf5_file = h5py.File(HDF5_DATA_PATH, 'a')
        hdf5_group = get_attack_group_name(O_ATTACK_NAME, FLAGS.label)
        hdf5_dataset = hdf5utils.create_image_dataset(hdf5_file,
                                                      group=hdf5_group,
                                                      attrs={
                                                          'seed': FLAGS.seed,
                                                          'eps': FLAGS.eps,
                                                          'ord': FLAGS.ord,
                                                          'eps_iter':
                                                          FLAGS.eps_iter,
                                                          'nb_iter':
                                                          FLAGS.nb_iter,
                                                          'target':
                                                          FLAGS.target
                                                      })

    model = InceptionV1Model()
    iterator = tf_dataset.make_one_shot_iterator()
    x, y = iterator.get_next()

    with tf.Session() as sess:
        attack = ProjectedGradientDescent(model, sess=sess)
        target_one_hot_encoded = get_one_hot_encoded_targets(FLAGS.target)

        x_adv = attack.generate(
            x,
            eps=FLAGS.eps,
            nb_iter=FLAGS.nb_iter,
            eps_iter=FLAGS.eps_iter,
            ord=(int(FLAGS.ord) if FLAGS.ord != 'inf' else np.inf),
            y_target=target_one_hot_encoded)

        pbar = tqdm(unit='imgs')
        try:
            while True:
                attacked_imgs = sess.run(x_adv)

                if not FLAGS.debug:
                    hdf5utils.add_images_to_dataset(attacked_imgs,
                                                    hdf5_dataset)

                pbar.update()
        except tf.errors.OutOfRangeError:
            pass
def test_transferability_subset(loader, attack_method, epsilon, torch_model1,
                                torch_model2, verbose, batch_size):
    batch_time = AverageMeter()
    err12s = AverageMeter()
    err21s = AverageMeter()

    end = time.time()

    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn1 = convert_pytorch_model_to_tf(torch_model1)
    tf_model_fn2 = convert_pytorch_model_to_tf(torch_model2)

    # Attack Parameters
    if attack_method == 'CW':
        params = {
            'binary_search_steps': 1,
            # 'y': None,
            'max_iterations': CW_ATTACK_ITERATIONS,
            'learning_rate': CW_LEARNING_RATE,
            'batch_size': batch_size,
            'initial_const': 10
        }
    elif attack_method == 'PGD':
        params = {
            'eps': epsilon,
            'clip_min': 0.,
            'clip_max': 1.,
            'eps_iter': 0.005,
            'nb_iter': 100,
            'rand_init': False
        }
    elif attack_method == 'FGSM':
        params = {'eps': epsilon, 'clip_min': 0., 'clip_max': 1.}
    else:
        raise Exception('Unknown attack method %s'.format(attack_method))

    # Model1 --> Model2
    cleverhans_model1 = CallableModelWrapper(tf_model_fn1,
                                             output_layer='logits')
    cleverhans_model2 = CallableModelWrapper(tf_model_fn2,
                                             output_layer='logits')

    # Create an attack
    if attack_method == 'CW':
        attk1 = CarliniWagnerL2(cleverhans_model1, sess=sess)
    if attack_method == 'PGD':
        attk1 = ProjectedGradientDescent(cleverhans_model1, sess=sess)
    if attack_method == 'FGSM':
        attk1 = FastGradientMethod(cleverhans_model1, sess=sess)

    if attack_method == 'CW':
        attk2 = CarliniWagnerL2(cleverhans_model2, sess=sess)
    if attack_method == 'PGD':
        attk2 = ProjectedGradientDescent(cleverhans_model2, sess=sess)
    if attack_method == 'FGSM':
        attk2 = FastGradientMethod(cleverhans_model2, sess=sess)

    adv_x_op1 = attk1.generate(x_op, **params)
    adv_x_op2 = attk2.generate(x_op, **params)

    # Test on model1 and model2
    adv_preds_op11 = tf_model_fn1(adv_x_op1)
    adv_preds_op12 = tf_model_fn2(adv_x_op1)
    adv_preds_op21 = tf_model_fn1(adv_x_op2)
    adv_preds_op22 = tf_model_fn2(adv_x_op2)

    for i, (xs, ys) in enumerate(loader):
        (adv_preds11, adv_preds12) = sess.run((adv_preds_op11, adv_preds_op12),
                                              feed_dict={x_op: xs})
        (adv_preds21, adv_preds22) = sess.run((adv_preds_op21, adv_preds_op22),
                                              feed_dict={x_op: xs})
        cnt11 = int((np.argmax(adv_preds11, axis=1) != ys).sum())
        cnt22 = int((np.argmax(adv_preds22, axis=1) != ys).sum())
        if cnt11 > 0:
            err12 = float(
                ((np.argmax(adv_preds12, axis=1) != ys) *
                 (np.argmax(adv_preds11, axis=1) != ys)).sum()) / float(cnt11)
            err12s.update(err12, cnt11)
        if cnt22 > 0:
            err21 = float(
                ((np.argmax(adv_preds22, axis=1) != ys) *
                 (np.argmax(adv_preds21, axis=1) != ys)).sum()) / float(cnt22)
            err21s.update(err21, cnt22)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if verbose:
            endline = '\n' if i % verbose == 0 else '\r'
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'error 1->2 {err12.val:.3f} ({err12.avg:.3f})\t'
                  'error 2->1 {err21.val:.3f} ({err21.avg:.3f})\t'.format(
                      i,
                      len(loader),
                      batch_time=batch_time,
                      err12=err12s,
                      err21=err21s),
                  end=endline)

    sess.close()
    return err12s.avg, err21s.avg