def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) model(**batch) metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, serialization_dir: str, eval_suffix: str, batch_weight_key: str) -> Dict[str, Any]: print("inst", type(instances), dir(instances), instances) check_for_gpu(cuda_device) nlp = spacy.load("en_core_web_lg") assert not os.path.exists( os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl')) # //ron: TODO - It could be helpful if we will encounter some problems with running times # caching saves us extra 30 minutes # if 'goodnews' in serialization_dir: # cache_path = 'data/goodnews/evaluation_cache.pkl' # elif 'nytimes' in serialization_dir: # cache_path = 'data/nytimes/evaluation_cache.pkl' # if os.path.exists(cache_path): # with open(cache_path, 'rb') as f: # cache = pickle.load(f) # else: # cache = {} with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") print(output_dict) return output_dict # write_to_json(output_dict, serialization_dir, # nlp, eval_suffix, cache) # metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, output_file: str = None, eval_type: str = None) -> Dict[str, Any]: model.eval() iterator = data_iterator(instances, num_epochs=1) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) with ExitStack() as stack: if output_file is None: file_handle = None else: file_handle = stack.enter_context(open(output_file, 'w')) for batch in generator_tqdm: ## made cuda compatible (if needed) batch = move_to_device(batch, cuda_device) model_output = model(**batch) metrics = model.get_metrics() if file_handle: _persist_data(file_handle, batch.get("metadata"), model_output, eval_type) description = ', '.join([ "%s: %.2f" % (name, value) for name, value in metrics.items() ]) + " ||" generator_tqdm.set_description(description) return model.get_metrics(reset=True)
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, output_file: str = None) -> Dict[str, Any]: model.eval() iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) with ExitStack() as stack: if output_file is None: file_handle = None else: file_handle = stack.enter_context(open(output_file, 'w')) for batch in generator_tqdm: model_output = model(**batch) metrics = model.get_metrics() if file_handle: id2label = model.vocab.get_index_to_token_vocabulary("labels") _persist_data(file_handle, batch.get("metadata"), model_output, id2label=id2label) description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) return model.get_metrics()
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular tasks (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the tasks on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for tensor_batch in generator_tqdm: nb_batches += 1 train_stages = ["stm", "sd", "valid"] task_index = TASKS_NAME.index(task_name) tensor_batch['task_index'] = torch.tensor(task_index) tensor_batch["reverse"] = torch.tensor(False) tensor_batch['for_training'] = torch.tensor(False) train_stage = train_stages.index("stm") tensor_batch['train_stage'] = torch.tensor(train_stage) tensor_batch = move_to_device(tensor_batch, 0) eval_output_dict = model.forward(**tensor_batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["stm_loss"] = float(eval_loss / nb_batches) description = training_util.description_from_metrics(metrics) generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True) metrics["stm_loss"] = float(eval_loss / nb_batches) return metrics
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, label_fname: str) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() label_file = open(label_fname, 'w') label_file.write('real_label,guessed_label\n') iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) total_num_inst = 0 for batch in generator_tqdm: num_inst = batch['tokens']['tokens'].size(0) total_num_inst += num_inst batch = util.move_to_device(batch, cuda_device) output_dict = model(**batch) if cuda_device == -1: output_matrix = output_dict['label_logits'].data.numpy() else: output_matrix = output_dict['label_logits'].data.cpu().numpy() output_labels = np.argmax(output_matrix, axis=1) if cuda_device == -1: true_labels = batch['label'].data.numpy() else: true_labels = batch['label'].data.cpu().numpy() assert true_labels.shape[0] == output_labels.shape[0] for i in range(true_labels.shape[0]): label_file.write(str(int(true_labels[i])) + ',') label_file.write(str(int(output_labels[i])) + '\n') metrics = model.get_metrics() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) print("NUM INSTANCES ITERATED OVER: " + str(total_num_inst)) label_file.close() return model.get_metrics(reset=True)
def evaluate(model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: """ Evaluate a model for a particular task (usually after training). Parameters ---------- model : ``allennlp.models.model.Model``, required The model to evaluate instances : ``Iterable[Instance]``, required The (usually test) dataset on which to evalute the model. task_name : ``str``, required The name of the task on which evaluate the model. data_iterator : ``DataIterator`` Iterator that go through the dataset. cuda_device : ``int`` Cuda device to use. Returns ------- metrics : ``Dict[str, Any]`` A dictionary containing the metrics on the evaluated dataset. """ check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) eval_loss = 0 nb_batches = 0 for batch in generator_tqdm: batch = util.move_to_device(batch, cuda_device) nb_batches += 1 eval_output_dict = model.forward(task_name=task_name, tensor_batch=batch) loss = eval_output_dict["loss"] eval_loss += loss.item() metrics = model.get_metrics(task_name=task_name) metrics["loss"] = float(eval_loss / nb_batches) description = ", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() ]) + " ||" generator_tqdm.set_description(description, refresh=False) metrics = model.get_metrics(task_name=task_name, reset=True, full=True) metrics["loss"] = float(eval_loss / nb_batches) return metrics
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset)) output = pd.DataFrame() for raw_batch, batch in generator_tqdm: raw_fields = [x.fields for x in raw_batch.instances] parsed_fields = [] for item in raw_fields: premise = " ".join([x.text for x in item['premise'].tokens]) hypothesis = " ".join([x.text for x in item['hypothesis'].tokens]) label = item['label'].label parsed_fields.append({ "sentence1": premise, "sentence2": hypothesis, "gold_label": label }) parsed_fields = pd.DataFrame(parsed_fields) tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False) bo = model.forward(**tensor_batch) metrics = model.get_metrics() description = ', '.join( ["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) batch_output = pd.DataFrame() INVERSE_LABEL_MAP = { 0: "entailment", 1: "neutral", 2: "contradiction", 3: "hidden" } batch_output['prediction_label'] = bo['label_logits'].data.numpy( ).argmax(axis=1) batch_output['prediction_score'] = bo['label_probs'].data.numpy().max( axis=1) batch_output['prediction_label'] = batch_output.prediction_label.apply( lambda x: INVERSE_LABEL_MAP[x]) parsed_output = pd.concat([parsed_fields, batch_output], axis=1) output = pd.concat([output, parsed_output], axis=0) hard_subset = output.loc[output.gold_label != output.prediction_label] easy_subset = output.loc[output.gold_label == output.prediction_label] return model.get_metrics(), hard_subset, easy_subset
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) batch_count = 0 loss_count = 0 total_loss = 0.0 for batch in generator_tqdm: batch_count += 1 batch = util.move_to_device(batch, cuda_device) loss = model(**batch).get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 metrics["loss"] = loss.item() total_loss += loss.item() if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss/batch_count return final_metrics
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: model(**batch) metrics = model.get_metrics() description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics()
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: model(**batch) metrics = model.get_metrics() description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description, refresh=False) return model.get_metrics(reset=True)
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset)) for batch in generator_tqdm: tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False) model.forward(**tensor_batch) metrics = model.get_metrics() description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) return model.get_metrics()
def get_model_predictions(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> (Dict[str, Any], List): model.eval() model_predictions = [] iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) return model.get_metrics(), model_predictions
def evaluate(model: Model, dataset: InstanceCollection, iterator: DataIterator, cuda_device: int) -> Dict[str, Any]: model.eval() generator = iterator(dataset, num_epochs=1, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset)) for batch in generator_tqdm: model(**batch) metrics = model.get_metrics() description = ', '.join( ["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" generator_tqdm.set_description(description) return model.get_metrics()
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str) -> Dict[str, Any]: _warned_tqdm_ignores_underscores = False check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not _warned_tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") _warned_tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str) -> Dict[str, Any]: check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") ############ Comment out this block to save class_probabilities, logits, and losses for each batch ######### # print(output_dict['class_probabilities'].shape) # import copy # # newoutput_dict = copy.deepcopy(output_dict) # newoutput_dict['class_probabilities'] = newoutput_dict['class_probabilities'].cpu().data.numpy() # newoutput_dict['logits'] = newoutput_dict['logits'].cpu().data.numpy() # newoutput_dict['loss'] = newoutput_dict['loss'].cpu().data.numpy() # # output_file = os.path.join(os.path.dirname(__file__), '..', "data", "test", # str(batch_count) + "_output.pkl") # import json # import pickle # if output_file: # with open(output_file, "wb") as file: # pickle.dump(newoutput_dict, file) # file.close() # ########################################################################################################### metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not HasBeenWarned.tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = ', '.join([ "%s: %.4f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError( "The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str) -> Dict[str, Any]: check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not HasBeenWarned.tqdm_ignores_underscores and any(metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_")]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check if loss_count != batch_count: raise RuntimeError("The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics
def evaluate(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, serialization_dir: str, eval_suffix: str, batch_weight_key: str) -> Dict[str, Any]: check_for_gpu(cuda_device) nlp = spacy.load("en_core_web_lg") assert not os.path.exists( os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl')) # caching saves us extra 30 minutes if 'goodnews' in serialization_dir: cache_path = 'data/goodnews/evaluation_cache.pkl' elif 'nytimes' in serialization_dir: cache_path = 'data/nytimes/evaluation_cache.pkl' if os.path.exists(cache_path): with open(cache_path, 'rb') as f: cache = pickle.load(f) else: cache = {} with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") write_to_json(output_dict, serialization_dir, nlp, eval_suffix, cache) metrics = model.get_metrics() if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if (not HasBeenWarned.tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics)): logger.warning("Metrics with names beginning with \"_\" will " "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = ', '.join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||" generator_tqdm.set_description(description, refresh=False) final_metrics = model.get_metrics(reset=True) if loss_count > 0: # Sanity check # if loss_count != batch_count: # raise RuntimeError("The model you are trying to evaluate only sometimes " + # "produced a loss!") final_metrics["loss"] = total_loss / total_weight if not os.path.exists(cache_path): with open(cache_path, 'wb') as f: pickle.dump(cache, f) return final_metrics