def test_batch_size_larger_than_dataset(self):
        dataset_size = 10
        batch_size = 100
        drop_last = True

        with self.assertRaises(ValueError):
            training_utils.batches_per_epoch(dataset_size=dataset_size,
                                             batch_size=batch_size,
                                             drop_last=drop_last)
Exemple #2
0
    def __init__(self):
        # Load the training set definition. It will be used to know the dataset
        # size and possibly to initialize a new training graph.
        training_set_def_path = paths.DatasetDefinitions.TRAINING
        with open(training_set_def_path, 'r') as f:
            training_set_def = json.load(f)

        self.dataset_size = len(training_set_def)
        self.batches_per_epoch = utils.batches_per_epoch(
            dataset_size=self.dataset_size,
            batch_size=config.ExperimentConfig.BATCH_SIZE_TRAINING,
            drop_last=True)

        # Create and save the MetaGraph for the training graph.
        metagraph_path = paths.MetaGraphs.TRAINING
        if os.path.exists(metagraph_path):
            logger.info('Importing existing training MetaGraph '
                        'from {}'.format(metagraph_path))
            training_graph = tf.Graph()
            with training_graph.as_default():
                tf.train.import_meta_graph(metagraph_path)
        else:
            logger.info('Creating new training MetaGraph')
            training_graph = graphs.build_training_graph(training_set_def)
            with training_graph.as_default():
                tf.train.export_meta_graph(metagraph_path)

        self._session = tf.Session(graph=training_graph)

        with training_graph.as_default():
            self._saver = tf.train.Saver(max_to_keep=1)
def run_evaluation(evaluation_engine, dataset_def_path, dataset_name=None):
    """Evaluates the best trained model on a test set.

    Args:
        evaluation_engine (:obj:`EvaluationEngine`): EvaluationEngine instance
            to run to evaluate the model on the dataset.
        dataset_def_path (str): Path to the dataset definition.
        dataset_name (str, optional): Identifier associated to the current
            dataset. Defaults to None.

    Returns:
        A tuple (<average loss>, <accuracy>).
    """
    with open(dataset_def_path, 'r') as f:
        dataset_def = json.load(f)

    num_batches = train_utils.batches_per_epoch(
        dataset_size=len(dataset_def),
        batch_size=config.ExperimentConfig.BATCH_SIZE_TEST,
        drop_last=False)

    desc = '{} Evaluating'.format(PROGRESS_BAR_PREFIX)
    if dataset_name:
        desc += ' on {}'.format(dataset_name)
    pbar = tqdm(evaluation_engine.evaluate_best_model_on_dataset(dataset_def),
                desc=desc,
                total=num_batches,
                leave=True,
                disable=DISABLE_PROGRESS_BAR)

    avg_loss, accuracy = evaluation_accumulator(pbar)
    return avg_loss, accuracy
    def test_drop_last_success(self):
        dataset_size = 100
        batch_size = 9
        drop_last = True
        expected_num_batches = 11

        num_batches = training_utils.batches_per_epoch(
            dataset_size=dataset_size,
            batch_size=batch_size,
            drop_last=drop_last)

        self.assertEqual(num_batches, expected_num_batches)
Exemple #5
0
    def evaluate_best_model_on_dataset(self, dataset_def):
        dataset_size = len(dataset_def)
        batches_per_epoch = utils.batches_per_epoch(
            dataset_size=dataset_size,
            batch_size=config.ExperimentConfig.BATCH_SIZE_TEST,
            drop_last=False)

        best_ckpt_prefix = paths.Checkpoints.BEST_MODEL
        best_ckpt_pattern = best_ckpt_prefix + '.*'
        if not glob.glob(best_ckpt_pattern):
            raise IOError('No checkpoint at {}'.format(best_ckpt_prefix))

        # Load the best trained weights.
        self._saver.restore(self._session, best_ckpt_prefix)

        # Fetch the input nodes.
        handle_ph = self._session.graph.get_tensor_by_name(
            naming.Names.ITERATOR_HANDLE + ':0')

        # Fetch the output nodes.
        loss = self._session.graph.get_tensor_by_name(
            naming.Names.EVALUATION_LOSS + ':0')
        (_probabilities, _prediction, num_correct_predictions,
         batch_size) = self._session.graph.get_collection(
             naming.Names.OUTPUT_COLLECTION)

        with self._session.graph.as_default():
            # Load the dataset and get the handle.
            # The dataset must be built "within" the current graph to be used.
            dataset = tf_ds.evaluation_set_from_dataset_definition(dataset_def)
            iterator = dataset.make_one_shot_iterator()
            handle = self._session.run(iterator.string_handle())

            for batch_idx in count():
                if batch_idx > batches_per_epoch:
                    logger.warning('Batch index is {} but an epoch should '
                                   'only contain '
                                   '{} batches'.format(batch_idx,
                                                       batches_per_epoch))
                try:
                    (loss_out, true_positives,
                     batch_size_out) = self._session.run(
                         [
                             loss,
                             num_correct_predictions,
                             batch_size,
                         ],
                         feed_dict={handle_ph: handle})
                except tf.errors.OutOfRangeError:
                    break

                yield batch_idx, loss_out, true_positives, batch_size_out