コード例 #1
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))
        for batch in generator_tqdm:
            batch = util.move_to_device(batch, cuda_device)
            model(**batch)
            metrics = model.get_metrics()
            if (not _warned_tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.2f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        return model.get_metrics(reset=True)
コード例 #2
0
ファイル: evaluate.py プロジェクト: ziaridoy20/allennlp
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
        for batch in generator_tqdm:
            batch = util.move_to_device(batch, cuda_device)
            model(**batch)
            metrics = model.get_metrics()
            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        return model.get_metrics(reset=True)
コード例 #3
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             serialization_dir: str, eval_suffix: str,
             batch_weight_key: str) -> Dict[str, Any]:
    print("inst", type(instances), dir(instances), instances)
    check_for_gpu(cuda_device)
    nlp = spacy.load("en_core_web_lg")
    assert not os.path.exists(
        os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl'))

    # //ron: TODO - It could be helpful if we will encounter some problems with running times
    # caching saves us extra 30 minutes
    # if 'goodnews' in serialization_dir:
    #     cache_path = 'data/goodnews/evaluation_cache.pkl'
    # elif 'nytimes' in serialization_dir:
    #     cache_path = 'data/nytimes/evaluation_cache.pkl'
    # if os.path.exists(cache_path):
    #     with open(cache_path, 'rb') as f:
    #         cache = pickle.load(f)
    # else:
    #     cache = {}

    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")
            print(output_dict)
            return output_dict
            # write_to_json(output_dict, serialization_dir,
            #               nlp, eval_suffix, cache)

            # metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
コード例 #4
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             output_file: str = None,
             eval_type: str = None) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    with ExitStack() as stack:
        if output_file is None:
            file_handle = None
        else:
            file_handle = stack.enter_context(open(output_file, 'w'))
        for batch in generator_tqdm:
            ## made cuda compatible (if needed)
            batch = move_to_device(batch, cuda_device)

            model_output = model(**batch)
            metrics = model.get_metrics()
            if file_handle:
                _persist_data(file_handle, batch.get("metadata"), model_output,
                              eval_type)
            description = ', '.join([
                "%s: %.2f" % (name, value) for name, value in metrics.items()
            ]) + " ||"
            generator_tqdm.set_description(description)

    return model.get_metrics(reset=True)
コード例 #5
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             output_file: str = None) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
    with ExitStack() as stack:
        if output_file is None:
            file_handle = None
        else:
            file_handle = stack.enter_context(open(output_file, 'w'))
        for batch in generator_tqdm:
            model_output = model(**batch)
            metrics = model.get_metrics()
            if file_handle:
                id2label = model.vocab.get_index_to_token_vocabulary("labels")
                _persist_data(file_handle, batch.get("metadata"), model_output, id2label=id2label)
            description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
            generator_tqdm.set_description(description)

    return model.get_metrics()
コード例 #6
0
def evaluate(model: Model, instances: Iterable[Instance], task_name: str,
             data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]:
    """
    Evaluate a model for a particular tasks (usually after training).
    
    Parameters
    ----------
    model : ``allennlp.models.model.Model``, required
        The model to evaluate
    instances : ``Iterable[Instance]``, required
        The (usually test) dataset on which to evalute the model.
    task_name : ``str``, required
        The name of the tasks on which evaluate the model.
    data_iterator : ``DataIterator``
        Iterator that go through the dataset.
    cuda_device : ``int``
        Cuda device to use.
        
    Returns
    -------
    metrics :  ``Dict[str, Any]``
        A dictionary containing the metrics on the evaluated dataset.
    """
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        eval_loss = 0
        nb_batches = 0
        for tensor_batch in generator_tqdm:
            nb_batches += 1

            train_stages = ["stm", "sd", "valid"]
            task_index = TASKS_NAME.index(task_name)
            tensor_batch['task_index'] = torch.tensor(task_index)
            tensor_batch["reverse"] = torch.tensor(False)
            tensor_batch['for_training'] = torch.tensor(False)
            train_stage = train_stages.index("stm")
            tensor_batch['train_stage'] = torch.tensor(train_stage)
            tensor_batch = move_to_device(tensor_batch, 0)

            eval_output_dict = model.forward(**tensor_batch)
            loss = eval_output_dict["loss"]
            eval_loss += loss.item()
            metrics = model.get_metrics(task_name=task_name)
            metrics["stm_loss"] = float(eval_loss / nb_batches)

            description = training_util.description_from_metrics(metrics)
            generator_tqdm.set_description(description, refresh=False)

        metrics = model.get_metrics(task_name=task_name, reset=True)
        metrics["stm_loss"] = float(eval_loss / nb_batches)
        return metrics
コード例 #7
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             label_fname: str) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        label_file = open(label_fname, 'w')
        label_file.write('real_label,guessed_label\n')

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
        total_num_inst = 0
        for batch in generator_tqdm:
            num_inst = batch['tokens']['tokens'].size(0)
            total_num_inst += num_inst
            batch = util.move_to_device(batch, cuda_device)

            output_dict = model(**batch)
            if cuda_device == -1:
                output_matrix = output_dict['label_logits'].data.numpy()
            else:
                output_matrix = output_dict['label_logits'].data.cpu().numpy()
            output_labels = np.argmax(output_matrix, axis=1)
            if cuda_device == -1:
                true_labels = batch['label'].data.numpy()
            else:
                true_labels = batch['label'].data.cpu().numpy()
            assert true_labels.shape[0] == output_labels.shape[0]
            for i in range(true_labels.shape[0]):
                label_file.write(str(int(true_labels[i])) + ',')
                label_file.write(str(int(output_labels[i])) + '\n')

            metrics = model.get_metrics()
            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)


        print("NUM INSTANCES ITERATED OVER: " + str(total_num_inst))
        label_file.close()

        return model.get_metrics(reset=True)
コード例 #8
0
ファイル: evaluate.py プロジェクト: yueyedeai/hmtl
def evaluate(model: Model, instances: Iterable[Instance], task_name: str,
             data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]:
    """
    Evaluate a model for a particular task (usually after training).
    
    Parameters
    ----------
    model : ``allennlp.models.model.Model``, required
        The model to evaluate
    instances : ``Iterable[Instance]``, required
        The (usually test) dataset on which to evalute the model.
    task_name : ``str``, required
        The name of the task on which evaluate the model.
    data_iterator : ``DataIterator``
        Iterator that go through the dataset.
    cuda_device : ``int``
        Cuda device to use.
        
    Returns
    -------
    metrics :  ``Dict[str, Any]``
        A dictionary containing the metrics on the evaluated dataset.
    """
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        eval_loss = 0
        nb_batches = 0
        for batch in generator_tqdm:
            batch = util.move_to_device(batch, cuda_device)
            nb_batches += 1

            eval_output_dict = model.forward(task_name=task_name,
                                             tensor_batch=batch)
            loss = eval_output_dict["loss"]
            eval_loss += loss.item()
            metrics = model.get_metrics(task_name=task_name)
            metrics["loss"] = float(eval_loss / nb_batches)

            description = ", ".join([
                "%s: %.2f" % (name, value) for name, value in metrics.items()
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        metrics = model.get_metrics(task_name=task_name, reset=True, full=True)
        metrics["loss"] = float(eval_loss / nb_batches)
        return metrics
コード例 #9
0
ファイル: evaluate.py プロジェクト: uwnlp/allennlp
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator,
                               total=iterator.get_num_batches(dataset))
    output = pd.DataFrame()
    for raw_batch, batch in generator_tqdm:
        raw_fields = [x.fields for x in raw_batch.instances]
        parsed_fields = []

        for item in raw_fields:
            premise = " ".join([x.text for x in item['premise'].tokens])
            hypothesis = " ".join([x.text for x in item['hypothesis'].tokens])
            label = item['label'].label
            parsed_fields.append({
                "sentence1": premise,
                "sentence2": hypothesis,
                "gold_label": label
            })
        parsed_fields = pd.DataFrame(parsed_fields)
        tensor_batch = arrays_to_variables(batch,
                                           cuda_device,
                                           for_training=False)
        bo = model.forward(**tensor_batch)
        metrics = model.get_metrics()
        description = ', '.join(
            ["%s: %.2f" % (name, value)
             for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)
        batch_output = pd.DataFrame()
        INVERSE_LABEL_MAP = {
            0: "entailment",
            1: "neutral",
            2: "contradiction",
            3: "hidden"
        }
        batch_output['prediction_label'] = bo['label_logits'].data.numpy(
        ).argmax(axis=1)
        batch_output['prediction_score'] = bo['label_probs'].data.numpy().max(
            axis=1)
        batch_output['prediction_label'] = batch_output.prediction_label.apply(
            lambda x: INVERSE_LABEL_MAP[x])
        parsed_output = pd.concat([parsed_fields, batch_output], axis=1)
        output = pd.concat([output, parsed_output], axis=0)
    hard_subset = output.loc[output.gold_label != output.prediction_label]
    easy_subset = output.loc[output.gold_label == output.prediction_label]
    return model.get_metrics(), hard_subset, easy_subset
コード例 #10
0
ファイル: evaluate.py プロジェクト: zwdcs/allennlp
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        batch_count = 0
        loss_count = 0
        total_loss = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = util.move_to_device(batch, cuda_device)
            loss = model(**batch).get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                metrics["loss"] = loss.item()
                total_loss += loss.item()

            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            if loss_count != batch_count:
                raise RuntimeError("The model you are trying to evaluate only sometimes " +
                                   "produced a loss!")
            final_metrics["loss"] = total_loss/batch_count

        return final_metrics
コード例 #11
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description, refresh=False)

    return model.get_metrics()
コード例 #12
0
ファイル: evaluate.py プロジェクト: Jordan-Sauchuk/allennlp
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description, refresh=False)

    return model.get_metrics(reset=True)
コード例 #13
0
def evaluate(model: Model,
             dataset: Dataset,
             iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        tensor_batch = arrays_to_variables(batch, cuda_device, for_training=False)
        model.forward(**tensor_batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics()
コード例 #14
0
def get_model_predictions(model: Model, instances: Iterable[Instance],
                          data_iterator: DataIterator,
                          cuda_device: int) -> (Dict[str, Any], List):

    model.eval()
    model_predictions = []

    iterator = data_iterator(instances,
                             num_epochs=1,
                             cuda_device=cuda_device,
                             for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    return model.get_metrics(), model_predictions
コード例 #15
0
ファイル: evaluate.py プロジェクト: vasantteja/allennlp
def evaluate(model: Model, dataset: InstanceCollection, iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset,
                         num_epochs=1,
                         cuda_device=cuda_device,
                         for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator,
                               total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(
            ["%s: %.2f" % (name, value)
             for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics()
コード例 #16
0
ファイル: evaluate.py プロジェクト: apmoore1/allennlp
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError("The model you are trying to evaluate only sometimes " +
                                   "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
コード例 #17
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            ############ Comment out this block to save class_probabilities, logits, and losses for each batch #########
            # print(output_dict['class_probabilities'].shape)
            # import copy
            #
            # newoutput_dict = copy.deepcopy(output_dict)
            # newoutput_dict['class_probabilities'] = newoutput_dict['class_probabilities'].cpu().data.numpy()
            # newoutput_dict['logits'] = newoutput_dict['logits'].cpu().data.numpy()
            # newoutput_dict['loss'] = newoutput_dict['loss'].cpu().data.numpy()
            #
            # output_file = os.path.join(os.path.dirname(__file__), '..', "data", "test",
            #                            str(batch_count) + "_output.pkl")
            # import json
            # import pickle
            # if output_file:
            #     with open(output_file, "wb") as file:
            #         pickle.dump(newoutput_dict, file)
            #     file.close()
            # ###########################################################################################################

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.4f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError(
                    "The model you are trying to evaluate only sometimes " +
                    "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
コード例 #18
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError("The model you are trying to evaluate only sometimes " +
                                   "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
コード例 #19
0
ファイル: evaluate.py プロジェクト: shlomota/newscaptioning
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             serialization_dir: str, eval_suffix: str,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    nlp = spacy.load("en_core_web_lg")
    assert not os.path.exists(
        os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl'))

    # caching saves us extra 30 minutes
    if 'goodnews' in serialization_dir:
        cache_path = 'data/goodnews/evaluation_cache.pkl'
    elif 'nytimes' in serialization_dir:
        cache_path = 'data/nytimes/evaluation_cache.pkl'
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as f:
            cache = pickle.load(f)
    else:
        cache = {}

    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            write_to_json(output_dict, serialization_dir, nlp, eval_suffix,
                          cache)

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.2f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            # if loss_count != batch_count:
            #     raise RuntimeError("The model you are trying to evaluate only sometimes " +
            #                        "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

    if not os.path.exists(cache_path):
        with open(cache_path, 'wb') as f:
            pickle.dump(cache, f)

    return final_metrics