def test_batch_size_larger_than_dataset(self): dataset_size = 10 batch_size = 100 drop_last = True with self.assertRaises(ValueError): training_utils.batches_per_epoch(dataset_size=dataset_size, batch_size=batch_size, drop_last=drop_last)
def __init__(self): # Load the training set definition. It will be used to know the dataset # size and possibly to initialize a new training graph. training_set_def_path = paths.DatasetDefinitions.TRAINING with open(training_set_def_path, 'r') as f: training_set_def = json.load(f) self.dataset_size = len(training_set_def) self.batches_per_epoch = utils.batches_per_epoch( dataset_size=self.dataset_size, batch_size=config.ExperimentConfig.BATCH_SIZE_TRAINING, drop_last=True) # Create and save the MetaGraph for the training graph. metagraph_path = paths.MetaGraphs.TRAINING if os.path.exists(metagraph_path): logger.info('Importing existing training MetaGraph ' 'from {}'.format(metagraph_path)) training_graph = tf.Graph() with training_graph.as_default(): tf.train.import_meta_graph(metagraph_path) else: logger.info('Creating new training MetaGraph') training_graph = graphs.build_training_graph(training_set_def) with training_graph.as_default(): tf.train.export_meta_graph(metagraph_path) self._session = tf.Session(graph=training_graph) with training_graph.as_default(): self._saver = tf.train.Saver(max_to_keep=1)
def run_evaluation(evaluation_engine, dataset_def_path, dataset_name=None): """Evaluates the best trained model on a test set. Args: evaluation_engine (:obj:`EvaluationEngine`): EvaluationEngine instance to run to evaluate the model on the dataset. dataset_def_path (str): Path to the dataset definition. dataset_name (str, optional): Identifier associated to the current dataset. Defaults to None. Returns: A tuple (<average loss>, <accuracy>). """ with open(dataset_def_path, 'r') as f: dataset_def = json.load(f) num_batches = train_utils.batches_per_epoch( dataset_size=len(dataset_def), batch_size=config.ExperimentConfig.BATCH_SIZE_TEST, drop_last=False) desc = '{} Evaluating'.format(PROGRESS_BAR_PREFIX) if dataset_name: desc += ' on {}'.format(dataset_name) pbar = tqdm(evaluation_engine.evaluate_best_model_on_dataset(dataset_def), desc=desc, total=num_batches, leave=True, disable=DISABLE_PROGRESS_BAR) avg_loss, accuracy = evaluation_accumulator(pbar) return avg_loss, accuracy
def test_drop_last_success(self): dataset_size = 100 batch_size = 9 drop_last = True expected_num_batches = 11 num_batches = training_utils.batches_per_epoch( dataset_size=dataset_size, batch_size=batch_size, drop_last=drop_last) self.assertEqual(num_batches, expected_num_batches)
def evaluate_best_model_on_dataset(self, dataset_def): dataset_size = len(dataset_def) batches_per_epoch = utils.batches_per_epoch( dataset_size=dataset_size, batch_size=config.ExperimentConfig.BATCH_SIZE_TEST, drop_last=False) best_ckpt_prefix = paths.Checkpoints.BEST_MODEL best_ckpt_pattern = best_ckpt_prefix + '.*' if not glob.glob(best_ckpt_pattern): raise IOError('No checkpoint at {}'.format(best_ckpt_prefix)) # Load the best trained weights. self._saver.restore(self._session, best_ckpt_prefix) # Fetch the input nodes. handle_ph = self._session.graph.get_tensor_by_name( naming.Names.ITERATOR_HANDLE + ':0') # Fetch the output nodes. loss = self._session.graph.get_tensor_by_name( naming.Names.EVALUATION_LOSS + ':0') (_probabilities, _prediction, num_correct_predictions, batch_size) = self._session.graph.get_collection( naming.Names.OUTPUT_COLLECTION) with self._session.graph.as_default(): # Load the dataset and get the handle. # The dataset must be built "within" the current graph to be used. dataset = tf_ds.evaluation_set_from_dataset_definition(dataset_def) iterator = dataset.make_one_shot_iterator() handle = self._session.run(iterator.string_handle()) for batch_idx in count(): if batch_idx > batches_per_epoch: logger.warning('Batch index is {} but an epoch should ' 'only contain ' '{} batches'.format(batch_idx, batches_per_epoch)) try: (loss_out, true_positives, batch_size_out) = self._session.run( [ loss, num_correct_predictions, batch_size, ], feed_dict={handle_ph: handle}) except tf.errors.OutOfRangeError: break yield batch_idx, loss_out, true_positives, batch_size_out