Ejemplo n.º 1
0
    def attack_batch(self, imgs, labs, restart_i):
        """
        Run the attack on a batch of images and labels.
        """
        def compare(x, y, is_robust):
            if not is_robust:
                # An attack is successful only when it give a wrong robust
                # prediction.
                return False

            if not isinstance(x, (float, int, np.int64)):
                x = np.copy(x)
                if self.TARGETED:
                    x[y] -= self.CONFIDENCE
                else:
                    x[y] += self.CONFIDENCE
                x = np.argmax(x)
            if self.TARGETED:
                return x == y
            else:
                return x != y

        dp_mechs = {
            'l2': 'gaussian',
            'l1': 'laplace',
        }
        if self.sensitivity_multiplier == None:
            self.sensitivity_multiplier = self.sess.run(
                self.model.pre_noise_sensitivity())

        batch_size = self.batch_size

        # convert to tanh-space
        _imgs = imgs
        imgs = np.arctanh((_imgs - self.boxplus) / self.boxmul * 0.999999)

        # set the lower and upper bounds accordingly
        lower_bound = np.zeros(batch_size)
        CONST = np.ones(batch_size) * self.initial_const
        upper_bound = np.ones(batch_size) * 1e10

        # the best l2, score, and image attack
        o_bestl2 = [1e10] * batch_size
        o_bestscore = [-1] * batch_size
        o_bestattack = [np.zeros(imgs[0].shape)] * batch_size

        o_true_robust_min_l2 = [1e10] * batch_size
        o_true_not_robust_min_l2 = [1e10] * batch_size
        o_false_not_robust_min_l2 = [1e10] * batch_size
        o_false_robust_min_l2 = [1e10] * batch_size

        for outer_step in range(self.BINARY_SEARCH_STEPS):
            print("Starting step {}".format(outer_step))
            print(o_bestl2)
            o_final = zip(o_true_robust_min_l2, o_true_not_robust_min_l2,
                          o_false_not_robust_min_l2, o_false_robust_min_l2)
            print(np.array(list(o_final)))
            # completely reset adam's internal state.
            self.sess.run(self.init)
            batch = imgs[:batch_size]
            batchlab = labs[:batch_size]

            bestl2 = [1e10] * batch_size
            bestscore = [-1] * batch_size

            # The last iteration (if we run many steps) repeat the search once.
            if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS - 1:
                CONST = upper_bound

            if restart_i == 1:
                # First restart, modifier is 0.
                random_start = tf.zeros(self.shape)
                random_start = self.sess.run(random_start)
            else:
                # init the modifier with a random perturbation of the original
                # image.
                random_start = tf.random_normal(self.shape)
                random_start = 0.2 * tf.nn.l2_normalize(random_start)
                random_start = self.sess.run(random_start)
                random_start = np.clip(_imgs + random_start, -.5, .5)
                random_start = np.arctanh((random_start - self.boxplus) /
                                          self.boxmul * 0.999999) - imgs

            # set the variables so that we don't have to send them over again
            self.sess.run(
                self.setup, {
                    self.assign_timg: batch,
                    self.assign_tlab: batchlab,
                    self.assign_const: CONST,
                    self.assign_modifier: random_start
                })

            prev = 1e6
            if self.model.noise_scale != None:
                args = {self.model.noise_scale: self.noise_scale}
            else:
                args = {}

            for iteration in range(self.MAX_ITERATIONS):
                # perform the attack
                _, l, l1, l2, l2s, scores, nimg, softmax_predictions = self.sess.run(
                    [
                        self.train, self.loss, self.loss1, self.loss2,
                        self.l2dist, self.output, self.newimg, self.predictions
                    ], args)

                print("Loop {}".format(iteration), (l, l1, l2))

                if np.all(scores >= -.0001) and np.all(scores <= 1.0001):
                    if np.allclose(np.sum(scores, axis=1), 1.0, atol=1e-3):
                        raise Exception(
                            "The output of model.predict should ",
                            "return the pre-softmax layer. It looks like you ",
                            "are returning the probability vector ",
                            "(post-softmax).")

                if iteration % 10 == 0:
                    # Make many predictions to determinie if the attacks is a
                    # success.
                    n_runs = 0
                    if self.attack_params.use_softmax:
                        predictions_form_softmax = np.zeros(
                            [self.batch_size, self.model_params.num_classes])
                        predictions_form_softmax_squared = np.zeros(
                            [self.batch_size, self.model_params.num_classes])
                        predictions_form_argmax = np.zeros(
                            [self.batch_size, self.model_params.num_classes])
                    else:
                        predictions_form_argmax = np.zeros(
                            [self.batch_size, self.model_params.num_classes])

                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    while True:
                        for i in range(self.attack_params.n_draws_attack):
                            n_runs += 1
                            for j in range(self.batch_size):
                                _i = i * batch_size + j
                                pred = argmax_predictions[_i]
                                if self.attack_params.use_softmax:
                                    predictions_form_softmax[
                                        j] += softmax_predictions[_i]
                                    predictions_form_softmax_squared[
                                        j] += np.square(
                                            softmax_predictions[_i])
                                    predictions_form_argmax[j, pred] += 1
                                else:
                                    predictions_form_argmax[j, pred] += 1

                        if self.attack_params.n_draws_eval >= n_runs:
                            break
                        else:
                            softmax_predictions = self.sess.run(
                                self.predictions, args)
                            argmax_predictions = np.argmax(softmax_predictions,
                                                           axis=1)

                    if self.attack_params.use_softmax:
                        final_predictions = predictions_form_softmax
                        is_correct = []
                        is_robust = []
                        for i in range(self.batch_size):
                            is_correct.append(
                                np.argmax(batchlab[i]) == np.argmax(
                                    final_predictions[i]))
                            robustness_form_softmax = robustness.robustness_size_softmax(
                                tot_sum=predictions_form_softmax[i],
                                sqr_sum=predictions_form_softmax_squared[i],
                                counts=predictions_form_argmax[i],
                                eta=self.model_params.
                                robustness_confidence_proba,
                                dp_attack_size=self.model_params.
                                attack_norm_bound,
                                dp_epsilon=self.model_params.dp_epsilon,
                                dp_delta=self.model_params.dp_delta,
                                dp_mechanism=dp_mechs[
                                    self.model_params.sensitivity_norm]
                            ) / self.sensitivity_multiplier
                            is_robust.append(robustness_form_softmax >= self.T)
                    else:
                        final_predictions = predictions_form_argmax
                        is_correct = []
                        is_robust = []
                        for i in range(self.batch_size):
                            is_correct.append(
                                np.argmax(batchlab[i]) == np.argmax(
                                    final_predictions[i]))
                            robustness_from_argmax = robustness.robustness_size_argmax(
                                counts=predictions_form_argmax[i],
                                eta=self.model_params.
                                robustness_confidence_proba,
                                dp_attack_size=self.model_params.
                                attack_norm_bound,
                                dp_epsilon=self.model_params.dp_epsilon,
                                dp_delta=self.model_params.dp_delta,
                                dp_mechanism=dp_mechs[
                                    self.model_params.sensitivity_norm]
                            ) / self.sensitivity_multiplier
                            is_robust.append(robustness_from_argmax >= self.T)

                    # adjust the best result found so far
                    for e, (l2, sc,
                            ii) in enumerate(zip(l2s, final_predictions,
                                                 nimg)):
                        l2 = math.sqrt(l2)
                        if l2 < bestl2[e] and not is_correct[e] and is_robust[
                                e]:
                            bestl2[e] = l2
                            bestscore[e] = np.argmax(sc)
                        if l2 < o_bestl2[e] and not is_correct[
                                e] and is_robust[e]:
                            o_bestl2[e] = l2
                            o_bestscore[e] = np.argmax(sc)
                            o_bestattack[e] = ii
                        if l2 < o_true_robust_min_l2[e] and  \
                                is_correct[e] and is_robust[e]:
                            o_true_robust_min_l2[e] = l2
                        if l2 < o_true_not_robust_min_l2[e] and  \
                                is_correct[e] and not is_robust[e]:
                            o_true_not_robust_min_l2[e] = l2
                        if l2 < o_false_not_robust_min_l2[e] and  \
                                not is_correct[e] and not is_robust[e]:
                            o_false_not_robust_min_l2[e] = l2
                        if l2 < o_false_robust_min_l2[e] and  \
                                not is_correct[e] and is_robust[e]:
                            o_false_robust_min_l2[e] = l2

                    # check if we should abort search if we're getting nowhere.
                    if self.ABORT_EARLY and iteration % 20 == 0:
                        if l > prev * .9999:
                            break
                        prev = l

            # adjust the constant as needed
            for e in range(batch_size):
                if bestscore[e] != -1:
                    # success, divide const by two
                    upper_bound[e] = min(upper_bound[e], CONST[e])
                    if upper_bound[e] < 1e9:
                        CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
                else:
                    # failure, either multiply by 10 if no solution found yet
                    #          or do binary search with the known upper bound
                    lower_bound[e] = max(lower_bound[e], CONST[e])
                    if upper_bound[e] < 1e9:
                        CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
                    else:
                        CONST[e] *= 10

        # return the best solution found
        o_bestl2 = np.array(o_bestl2)
        o_final = zip(o_true_robust_min_l2, o_true_not_robust_min_l2,
                      o_false_not_robust_min_l2, o_false_robust_min_l2)
        o_final = list(o_final)
        return np.array(o_final)
Ejemplo n.º 2
0
def evaluate_one(dataset,
                 model_class,
                 model_params,
                 attack_class,
                 attack_params,
                 dir_name=None,
                 compute_robustness=True,
                 dev='/cpu:0'):

    gpu = int(dev.split(":")[1]) + FLAGS.min_gpu_number
    gpu = gpu % 16  # for 16 GPUs exps
    dev = "{}:{}".format(dev.split(":")[0], gpu)

    print("Evaluating attack on dev:{}".format(dev), "\n", attack_params)
    with tf.device(dev):
        if dir_name == None:
            dir_name = FLAGS.models_dir

        model_dir = os.path.join(
            dir_name, models.params.name_from_params(model_class,
                                                     model_params))
        attack_dir = os.path.join(
            model_dir, 'attack_results',
            attacks.params.name_from_params(attack_params))

        # if results are in place, don't redo
        result_path = os.path.join(attack_dir, "eval_data.json")
        if os.path.exists(result_path):
            print("Path: {} exists -- skipping!!!".format(result_path))
            return

        if dataset == None:
            dataset = FLAGS.dataset

        tot_batch_size_atk = train_attack.max_batch_size[
            models.name_from_module(model_class)]
        tot_batch_size = max_batch_size[models.name_from_module(model_class)]
        # Some book keeping to maximize the GPU usage depending on the attack
        # requirement.
        images_per_batch_attack = min(
            attack_params.num_examples,
            attack_class.Attack.image_num_per_batch_train(
                tot_batch_size_atk, attack_params))
        images_per_batch_eval = min(
            attack_params.num_examples,
            attack_class.Attack.image_num_per_batch_eval(
                tot_batch_size, attack_params))
        batch_size = min(images_per_batch_attack, images_per_batch_eval)

        image_placeholder = tf.placeholder(tf.float32, [
            batch_size, model_params.image_size, model_params.image_size,
            model_params.n_channels
        ])
        label_placeholder = tf.placeholder(
            tf.int32, [batch_size, model_params.num_classes])

        model_params = models.params.update(model_params, 'batch_size',
                                            batch_size)
        model_params = models.params.update(model_params, 'n_draws',
                                            attack_params.n_draws_eval)

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.visible_device_list = str(dev.split(":")[-1])
        sess = tf.Session(config=config)

        # Special treatment of imagenet: load inception + autoencoder
        if 'imagenet' in model_dir and model_params.attack_norm_bound > 0.0:
            autoencoder_dir_name = os.path.join(
                model_dir,
                "autoencoder_l2_l2_s1_{}_32_32_64_10_8_5_srd1221_srd1221_srd1221"
                .format(model_params.attack_norm_bound))
            autoencoder_params = json.load(
                open(os.path.join(autoencoder_dir_name, "params.json"), "r"))
            autoencoder_params['n_draws'] = attack_params.n_draws_eval
            # hyperparams for autoencoder
            autoencoder_hps = tf.contrib.training.HParams()
            for k in autoencoder_params:
                autoencoder_hps.add_hparam(k, autoencoder_params[k])
            autoencoder_hps.batch_size = batch_size * attack_params.n_draws_attack
            autoencoder_hps.autoencoder_dir_name = autoencoder_dir_name
            from models import autoencoder_model
            autoencoder_model = autoencoder_model.Autoencoder(
                autoencoder_hps, image_placeholder, image_placeholder, "eval")
            autoencoder_model.build_graph()
            autoencoder_variables = []
            for k in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                autoencoder_variables.append(k)
            autoencoder_saver = tf.train.Saver(autoencoder_variables)
            autoencoder_summary_writer = tf.summary.FileWriter(
                autoencoder_dir_name)
            try:
                autoencoder_ckpt_state = tf.train.get_checkpoint_state(
                    autoencoder_dir_name)
            except tf.errors.OutOfRangeError as e:
                tf.logging.error('Cannot restore checkpoint: %s', e)
            print('Autoencoder: Loading checkpoint',
                  autoencoder_ckpt_state.model_checkpoint_path)
            autoencoder_saver.restore(
                sess, autoencoder_ckpt_state.model_checkpoint_path)
            # imagenet dataset loader returns images in [0, 1]
            images = 2 * (autoencoder_model.output - 0.5)
            model = model_class.Model(model_params, images, label_placeholder,
                                      'eval')
            model.build_graph()
            inception_variables = []
            for k in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                if k in autoencoder_variables and k.name != "global_step":
                    continue
                if k.name.startswith("DW-encoder") or k.name.startswith("b-encoder")\
                     or k.name.startswith("b-decoder"):
                    continue
                inception_variables.append(k)
            saver = tf.train.Saver(inception_variables)
        else:
            model = model_class.Model(model_params, image_placeholder,
                                      label_placeholder, 'eval')
            model.build_graph()
            saver = tf.train.Saver()

        with sess:
            tf.train.start_queue_runners(sess)
            coord = tf.train.Coordinator()

            summary_writer = tf.summary.FileWriter(model_dir)
            try:
                ckpt_state = tf.train.get_checkpoint_state(model_dir)
            except tf.errors.OutOfRangeError as e:
                print('Cannot restore checkpoint: ', e)
                return
            if not (ckpt_state and ckpt_state.model_checkpoint_path):
                print('\n\n\n\t *** No model to eval yet at: {}\n\n\n'.\
                        format(model_dir))
                return
            print('Loading checkpoint ', ckpt_state.model_checkpoint_path)
            saver.restore(sess, ckpt_state.model_checkpoint_path)

            ops = model.predictions  # results of the softmax layer

            clean_preds = []
            defense_preds = []
            counters = []

            if model.noise_scale != None:
                args = {model.noise_scale: 1.0}
            else:
                args = {}
            if 'imagenet' in model_dir and model_params.attack_norm_bound > 0.0:
                args = {autoencoder_model.noise_scale: 1.0}

            data = {
                'argmax_sum': [],
                'softmax_sum': [],
                'softmax_sqr_sum': [],
                'robustness_from_argmax': [],
                'robustness_from_softmax': [],
                'adv_argmax_sum': [],
                'adv_softmax_sum': [],
                'adv_softmax_sqr_sum': [],
                'adv_robustness_from_argmax': [],
                'adv_robustness_from_softmax': [],
                'pred_truth': [],
                'adversarial_norm': [],
            }

            num_iter = int(
                math.ceil(attack_params.num_examples /
                          images_per_batch_attack))
            intra_batch_num_iter = int(
                math.ceil(images_per_batch_attack / batch_size))
            for step in range(0, num_iter):
                print("Evaluate:: Starting step {}/{}".format(
                    step + 1, num_iter))
                pred_truth = np.zeros([images_per_batch_attack], dtype=int)

                predictions = np.zeros([images_per_batch_attack], dtype=int)

                prediction_votes = np.zeros(
                    [images_per_batch_attack, model_params.num_classes])
                prediction_softmax_sum = np.zeros(
                    [images_per_batch_attack, model_params.num_classes])
                prediction_softmax_sum_sqr = np.zeros(
                    [images_per_batch_attack, model_params.num_classes])

                adv_prediction_votes = np.zeros([
                    images_per_batch_attack, attack_params.restarts,
                    model_params.num_classes
                ])
                adv_prediction_softmax_sum = np.zeros([
                    images_per_batch_attack, attack_params.restarts,
                    model_params.num_classes
                ])
                adv_prediction_softmax_sum_sqr = np.zeros([
                    images_per_batch_attack, attack_params.restarts,
                    model_params.num_classes
                ])

                adv_norm = np.zeros(
                    [images_per_batch_attack, attack_params.restarts])

                for restart in range(0, attack_params.restarts):
                    print("Evaluate:: Starting restart {}/{}".format(
                        restart + 1, attack_params.restarts))
                    # Naming is advbatch-1-r-1, advbatch-2-r-1, advbatch-1-r-2 ...
                    inputs, adv_inputs, labs, adv_labs = attacks.utils.load_batch(
                        attack_dir, step + 1, restart + 1)

                    if attack_params.attack_norm == 'l2':
                        norm_ord = 2
                    elif attack_params.attack_norm == 'l_inf':
                        norm_ord = np.inf
                    else:
                        raise ValueError("Attack norm not supported")

                    s = inputs.shape
                    adv_norm_restart = np.linalg.norm(
                            np.reshape(inputs, (s[0], -1)) -  \
                                    np.reshape(adv_inputs, (s[0], -1)),
                            ord=norm_ord,
                            axis=1
                    )
                    adv_norm[:, restart] = adv_norm_restart

                    for intra_batch_step in range(0, intra_batch_num_iter):
                        batch_i_start = intra_batch_step * batch_size
                        batch_i_end = min((intra_batch_step + 1) * batch_size,
                                          images_per_batch_attack)

                        image_batch = inputs[batch_i_start:batch_i_end]
                        adv_image_batch = adv_inputs[batch_i_start:batch_i_end]
                        label_batch = labs[batch_i_start:batch_i_end]

                        # Handle end of batch with wrong size
                        true_batch_size = image_batch.shape[0]
                        if true_batch_size < batch_size:
                            pad_size = batch_size - true_batch_size
                            image_batch = np.pad(image_batch, [(0, pad_size),
                                                               (0, 0), (0, 0),
                                                               (0, 0)],
                                                 'constant')
                            adv_image_batch = np.pad(adv_image_batch,
                                                     [(0, pad_size), (0, 0),
                                                      (0, 0),
                                                      (0, 0)], 'constant')
                            label_batch = np.pad(label_batch, [(0, pad_size),
                                                               (0, 0)],
                                                 'constant')

                        # Predictions on the original image: only on one restart
                        if restart == 0:
                            args[image_placeholder] = image_batch
                            args[label_placeholder] = label_batch

                            softmax = sess.run(ops, args)
                            max_softmax = np.argmax(softmax, axis=1)
                            for i in range(attack_params.n_draws_eval):
                                for j in range(true_batch_size):
                                    abs_j = batch_i_start + j

                                    pred_truth[abs_j] = np.argmax(
                                        label_batch[j])

                                    rel_i = i * batch_size + j
                                    pred = max_softmax[rel_i]
                                    prediction_votes[abs_j, pred] += 1
                                    prediction_softmax_sum[abs_j] +=  \
                                        softmax[rel_i]
                                    prediction_softmax_sum_sqr[abs_j] +=  \
                                        np.square(softmax[rel_i])

                        # Predictions on the adversarial image for current
                        # restart
                        args[image_placeholder] = adv_image_batch
                        args[label_placeholder] = label_batch

                        softmax = sess.run(ops, args)
                        max_softmax = np.argmax(softmax, axis=1)
                        for i in range(attack_params.n_draws_eval):
                            for j in range(true_batch_size):
                                abs_j = batch_i_start + j
                                rel_i = i * batch_size + j
                                pred = max_softmax[rel_i]
                                adv_prediction_votes[abs_j, restart, pred] += 1
                                adv_prediction_softmax_sum[abs_j, restart] +=  \
                                        softmax[rel_i]
                                adv_prediction_softmax_sum_sqr[abs_j, restart] \
                                        += np.square(softmax[rel_i])

                predictions = np.argmax(prediction_votes, axis=1)
                adv_predictions = np.argmax(adv_prediction_votes, axis=2)

                data['pred_truth'] += pred_truth.tolist()

                data['adversarial_norm'] += adv_norm.tolist()

                data['argmax_sum'] += prediction_votes.tolist()
                data['softmax_sum'] += prediction_softmax_sum.tolist()
                data['softmax_sqr_sum'] += prediction_softmax_sum_sqr.tolist()

                data['adv_argmax_sum'] += adv_prediction_votes.tolist()
                data['adv_softmax_sum'] += adv_prediction_softmax_sum.tolist()
                data[
                    'adv_softmax_sqr_sum'] += adv_prediction_softmax_sum_sqr.tolist(
                    )

        sensitivity_multiplier = 1.0
        try:
            with open(model_dir + "/sensitivity_multiplier.json") as f:
                sensitivity_multiplier = float(json.loads(f.read())[0])
        except Exception:
            print("Missing Mulltiplier")
            pass

        # Compute robustness and add it to the eval data.
        if compute_robustness:  # This is used mostly to avoid errors on non pixeldp DNNs
            dp_mechs = {
                'l2': 'gaussian',
                'l1': 'laplace',
            }
            robustness_from_argmax = [
                robustness.robustness_size_argmax(
                    counts=x,
                    eta=model_params.robustness_confidence_proba,
                    dp_attack_size=model_params.attack_norm_bound,
                    dp_epsilon=model_params.dp_epsilon,
                    dp_delta=model_params.dp_delta,
                    dp_mechanism=dp_mechs[model_params.sensitivity_norm]) /
                sensitivity_multiplier for x in data['argmax_sum']
            ]
            data['robustness_from_argmax'] = robustness_from_argmax
            robustness_form_softmax = [
                robustness.robustness_size_softmax(
                    tot_sum=data['softmax_sum'][i],
                    sqr_sum=data['softmax_sqr_sum'][i],
                    counts=data['argmax_sum'][i],
                    eta=model_params.robustness_confidence_proba,
                    dp_attack_size=model_params.attack_norm_bound,
                    dp_epsilon=model_params.dp_epsilon,
                    dp_delta=model_params.dp_delta,
                    dp_mechanism=dp_mechs[model_params.sensitivity_norm]) /
                sensitivity_multiplier for i in range(len(data['argmax_sum']))
            ]
            data['robustness_form_softmax'] = robustness_form_softmax
            adv_robustness_from_argmax = [[
                robustness.robustness_size_argmax(
                    counts=x[r],
                    eta=model_params.robustness_confidence_proba,
                    dp_attack_size=model_params.attack_norm_bound,
                    dp_epsilon=model_params.dp_epsilon,
                    dp_delta=model_params.dp_delta,
                    dp_mechanism=dp_mechs[model_params.sensitivity_norm]) /
                sensitivity_multiplier
                for r in range(0, attack_params.restarts)
            ] for x in data['adv_argmax_sum']]
            data['adv_robustness_from_argmax'] = adv_robustness_from_argmax
            adv_robustness_form_softmax = [[
                robustness.robustness_size_softmax(
                    tot_sum=data['adv_softmax_sum'][i][r],
                    sqr_sum=data['adv_softmax_sqr_sum'][i][r],
                    counts=data['adv_argmax_sum'][i][r],
                    eta=model_params.robustness_confidence_proba,
                    dp_attack_size=model_params.attack_norm_bound,
                    dp_epsilon=model_params.dp_epsilon,
                    dp_delta=model_params.dp_delta,
                    dp_mechanism=dp_mechs[model_params.sensitivity_norm]) /
                sensitivity_multiplier
                for r in range(0, attack_params.restarts)
            ] for i in range(len(data['adv_argmax_sum']))]
            data['adv_robustness_form_softmax'] = adv_robustness_form_softmax

        data['sensitivity_mult_used'] = sensitivity_multiplier

        # Log eval data
        with open(result_path, 'w') as f:
            f.write(json.dumps(data))

        return data
Ejemplo n.º 3
0
def evaluate(hps,
             model,
             dataset=None,
             dir_name=None,
             rerun=False,
             compute_robustness=True,
             dev='/cpu:0'):
    """Evaluate the ResNet and log prediction counters to compute
    sensitivity."""

    # Trick to start from arbitrary GPU  number
    gpu = int(dev.split(":")[1]) + FLAGS.min_gpu_number
    if gpu >= 16:
        gpu -= 16
    dev = "{}:{}".format(dev.split(":")[0], gpu)

    print("Evaluating model on dev:{}".format(dev))
    with tf.device(dev):
        if dir_name == None:
            dir_name = FLAGS.models_dir

        dir_name = os.path.join(dir_name,
                                models.params.name_from_params(model, hps))

        if os.path.isfile(dir_name + "/eval_data.json") and not rerun:
            print("Skip eval of:{}".format(dir_name))
            # run only new models
            return

        if dataset == None:
            dataset = FLAGS.dataset

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.visible_device_list = str(dev.split(":")[-1])
        sess = tf.Session(config=config)

        # Special treatment of imagenet: load inception + autoencoder
        if 'imagenet' in dir_name and hps.attack_norm_bound > .0:
            images, labels = datasets.build_input(dataset, FLAGS.data_path,
                                                  hps.batch_size,
                                                  hps.image_standardization,
                                                  'eval')
            autoencoder_dir_name = os.path.join(
                dir_name,
                "autoencoder_l2_l2_s1_{}_32_32_64_10_8_5_srd1221_srd1221_srd1221"
                .format(hps.attack_norm_bound))
            autoencoder_params = json.load(
                open(os.path.join(autoencoder_dir_name, "params.json"), "r"))
            autoencoder_params['n_draws'] = hps.n_draws
            # hyperparams for autoencoder
            autoencoder_hps = tf.contrib.training.HParams()
            for k in autoencoder_params:
                autoencoder_hps.add_hparam(k, autoencoder_params[k])
            autoencoder_hps.batch_size = hps.batch_size * hps.n_draws
            autoencoder_hps.autoencoder_dir_name = autoencoder_dir_name
            from models import autoencoder_model
            autoencoder_model = autoencoder_model.Autoencoder(
                autoencoder_hps, images, images, "eval")
            autoencoder_model.build_graph()
            autoencoder_variables = []
            for k in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                autoencoder_variables.append(k)
            autoencoder_saver = tf.train.Saver(autoencoder_variables)
            autoencoder_summary_writer = tf.summary.FileWriter(
                autoencoder_dir_name)
            try:
                autoencoder_ckpt_state = tf.train.get_checkpoint_state(
                    autoencoder_dir_name)
            except tf.errors.OutOfRangeError as e:
                tf.logging.error('Cannot restore checkpoint: %s', e)
            print('Autoencoder: Loading checkpoint',
                  autoencoder_ckpt_state.model_checkpoint_path)
            autoencoder_saver.restore(
                sess, autoencoder_ckpt_state.model_checkpoint_path)
            # imagenet dataset loader returns images in [0, 1]
            images = 2 * (autoencoder_model.output - 0.5)
        else:
            images, labels = datasets.build_input(dataset, FLAGS.data_path,
                                                  hps.batch_size,
                                                  hps.image_standardization,
                                                  'eval')

        tf.train.start_queue_runners(sess)
        model = model.Model(hps, images, labels, 'eval')
        model.build_graph()

        if hps.image_size == 299 and 'imagenet' in dir_name\
                and hps.attack_norm_bound > .0:
            inception_variables = []
            for k in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                if k in autoencoder_variables and k.name != "global_step":
                    continue
                if k.name.startswith("DW-encoder") or k.name.startswith("b-encoder")\
                     or k.name.startswith("b-decoder"):
                    continue
                inception_variables.append(k)

            saver = tf.train.Saver(inception_variables)
        else:
            saver = tf.train.Saver()

        summary_writer = tf.summary.FileWriter(dir_name)

        try:
            ckpt_state = tf.train.get_checkpoint_state(dir_name)
        except tf.errors.OutOfRangeError as e:
            tf.logging.error('Cannot restore checkpoint: %s', e)

        tf.logging.info('Loading checkpoint %s',
                        ckpt_state.model_checkpoint_path)
        saver.restore(sess, ckpt_state.model_checkpoint_path)

        # Make predictions on the dataset, keep the label distribution
        data = {
            'argmax_sum': [],
            'softmax_sum': [],
            'softmax_sqr_sum': [],
            'pred_truth_argmax': [],
            'pred_truth_softmax': [],
        }
        total_prediction, correct_prediction_argmax, correct_prediction_logits = 0, 0, 0
        eval_data_size = hps.eval_data_size
        eval_batch_size = hps.batch_size
        eval_batch_count = int(eval_data_size / eval_batch_size)
        for i in six.moves.range(eval_batch_count):
            if model.noise_scale == None:
                args = {}  # For Madry and inception
            else:
                args = {model.noise_scale: 1.0}
            if 'imagenet' in dir_name and hps.attack_norm_bound > .0:
                args = {autoencoder_model.noise_scale: 1.0}
            (loss, softmax_predictions, truth) = sess.run([
                model.cost,
                model.predictions,
                model.labels,
            ], args)
            print("Done: {}/{}".format(eval_batch_size * i, eval_data_size))
            truth = np.argmax(truth, axis=1)[:hps.batch_size]
            prediction_votes = np.zeros([hps.batch_size, hps.num_classes])
            softmax_sum = np.zeros([hps.batch_size, hps.num_classes])
            softmax_sqr_sum = np.zeros([hps.batch_size, hps.num_classes])

            predictions = np.argmax(softmax_predictions, axis=1)
            for i in range(hps.n_draws):
                for j in range(hps.batch_size):
                    prediction_votes[j,
                                     predictions[i * hps.batch_size + j]] += 1
                    softmax_sum[j] += softmax_predictions[i * hps.batch_size +
                                                          j]
                    softmax_sqr_sum[j] += np.square(
                        softmax_predictions[i * hps.batch_size + j])

            predictions = np.argmax(prediction_votes, axis=1)
            predictions_logits = np.argmax(softmax_sum, axis=1)

            data['argmax_sum'] += prediction_votes.tolist()
            data['softmax_sum'] += softmax_sum.tolist()
            data['softmax_sqr_sum'] += softmax_sqr_sum.tolist()
            data['pred_truth_argmax'] += (truth == predictions).tolist()
            data['pred_truth_softmax'] += (
                truth == predictions_logits).tolist()

            print("From argamx: {} / {}".format(np.sum(truth == predictions),
                                                len(predictions)))
            print("From logits: {} / {}".format(
                np.sum(truth == predictions_logits), len(predictions)))

            correct_prediction_argmax += np.sum(truth == predictions)
            correct_prediction_logits += np.sum(truth == predictions_logits)
            total_prediction += predictions.shape[0]

            current_precision_argmax = 1.0 * correct_prediction_argmax / total_prediction
            current_precision_logits = 1.0 * correct_prediction_logits / total_prediction
            print("Current precision from argmax: {}".format(
                current_precision_argmax))
            print("Current precision from logits: {}".format(
                current_precision_logits))
            print()

        # For Parseval, get true sensitivity, use to rescale the actual attack
        # bound as the nosie assumes this to be 1 but often it is not.
        # Parseval updates usually have a sensitivity higher than 1
        # despite the projection: we need to rescale when computing
        # sensitivity.
        if model.pre_noise_sensitivity() == None:
            sensitivity_multiplier = None
        else:
            sensitivity_multiplier = float(
                sess.run(model.pre_noise_sensitivity(),
                         {model.noise_scale: 1.0}))
        with open(dir_name + "/sensitivity_multiplier.json", 'w') as f:
            d = [sensitivity_multiplier]
            f.write(json.dumps(d))

        # Compute robustness and add it to the eval data.
        if compute_robustness:  # This is used mostly to avoid errors on non pixeldp DNNs
            dp_mechs = {
                'l2': 'gaussian',
                'l1': 'laplace',
            }
            robustness_from_argmax = [
                robustness.robustness_size_argmax(
                    counts=x,
                    eta=hps.robustness_confidence_proba,
                    dp_attack_size=hps.attack_norm_bound,
                    dp_epsilon=hps.dp_epsilon,
                    dp_delta=hps.dp_delta,
                    dp_mechanism=dp_mechs[hps.sensitivity_norm]) /
                sensitivity_multiplier for x in data['argmax_sum']
            ]
            data['robustness_from_argmax'] = robustness_from_argmax
            robustness_from_softmax = [
                robustness.robustness_size_softmax(
                    tot_sum=data['softmax_sum'][i],
                    sqr_sum=data['softmax_sqr_sum'][i],
                    counts=data['argmax_sum'][i],
                    eta=hps.robustness_confidence_proba,
                    dp_attack_size=hps.attack_norm_bound,
                    dp_epsilon=hps.dp_epsilon,
                    dp_delta=hps.dp_delta,
                    dp_mechanism=dp_mechs[hps.sensitivity_norm]) /
                sensitivity_multiplier for i in range(len(data['argmax_sum']))
            ]
            data['robustness_from_softmax'] = robustness_from_softmax

        data['sensitivity_mult_used'] = sensitivity_multiplier

        # Log eval data
        with open(dir_name + "/eval_data.json", 'w') as f:
            f.write(json.dumps(data))

        # Print stuff
        precision_argmax = 1.0 * correct_prediction_argmax / total_prediction
        precision_logits = 1.0 * correct_prediction_logits / total_prediction

        precision_summ = tf.Summary()
        precision_summ.value.add(tag='Precision argmax',
                                 simple_value=precision_argmax)
        precision_summ.value.add(tag='Precision logits',
                                 simple_value=precision_logits)
        #summary_writer.add_summary(precision_summ, train_step)
        #  summary_writer.add_summary(summaries, train_step)
        tf.logging.info(
            'loss: %.3f, precision argmax: %.3f, precision logits: %.3f' %
            (loss, precision_argmax, precision_logits))
        summary_writer.flush()