Esempio n. 1
0
def get_metrics(model: Model) -> Dict[str, float]:
    """
    Gets the metrics but sets ``"loss"`` to
    the total loss divided by the ``num_batches`` so that
    the ``"loss"`` metric is "average loss per batch".
    """
    model.decode()
    metrics = model.output_dict['metrics']
    # metrics["loss"] = float(total_loss / num_batches) if num_batches > 0 else 0.0
    return metrics
def get_model_predictions(model: Model, instances: Iterable[Instance],
                          data_iterator: DataIterator,
                          cuda_device: int) -> (Dict[str, Any], List):

    model.eval()
    model_predictions = []

    iterator = data_iterator(instances,
                             num_epochs=1,
                             cuda_device=cuda_device,
                             for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    return model.get_metrics(), model_predictions
Esempio n. 3
0
    def get_predictions(self,
                        instances: List[Instance],
                        model: Model,
                        cuda_device: int = -1,
                        prediction_file: Optional[str] = None,
                        visualization_file: Optional[str] = None,
                        verbose: bool = False) -> List[Dict]:
        """
        We use this function to get predictions
        We use a basic itereator, since a bucket iterator shuffles
        data, even for shuffle=False

        Arguments:
            data (List[Instance]) : The list of instances for inference
            model (Model) : The model being used for predictions
            cuda_device (int) : The cuda device being used for processing
            verbose (bool) : Log accuracies and such

        Returns:
            predictions (List[Dict]) : The predictions. Each contains the
                following keys
                * text (List[str]): The tokens
                * pred (List[Tuple[str, float]]): The predicted labels and
                    probs. Can potentially have multiple labels being
                    predicted
                * gold (List[str]): The gold labels
                    can potentially have multiple gold labels
                * pred_labels (List[str]): Predicted labels for segmentation
                    Note that an this method is implemented by the base classes
                * attn (Dict[str, List[float]]) : A dictionary mapping tags to
                    attention values
                * gold_labels : The gold labels for segmentation
                    The gold labels for segmentation

        Additionally, this class stores the base_predictions, as well as the
            visualization, if visualization is set to True, and base_dir is
             provided
        """
        iterator = self._iterator(instances,
                                  num_epochs=1,
                                  shuffle=False,
                                  cuda_device=cuda_device,
                                  for_training=False)
        model.eval()
        num_batches = self._iterator.get_num_batches(instances)
        inference_generator_tqdm = Tqdm.tqdm(iterator, total=num_batches)
        predictions = []
        index = 0
        matrix = {
            self._indexer.ix2tags[ix]: {
                "tp": 0.,
                "fp": 0,
                "fn": 0.,
                "tn": 0.
            }
            for ix in range(len(self._indexer.ix2tags))
        }

        for batch in inference_generator_tqdm:
            # Currently I don't support multi-gpu data parallel
            output_dict = model.decode(model(**batch))
            for ix in range(len(output_dict["preds"])):
                text = self._get_text_from_instance(instances[index])
                pred = output_dict["preds"][ix]
                gold = [
                    self._indexer.get_tag(label)
                    for label in instances[index].fields['labels'].labels
                ]
                attn = output_dict["attentions"][ix]
                gold_labels = instances[index].fields['tags'].labels
                assert all([len(attn[x]) == len(text) for x in attn])
                gold_labels = self._indexer.extract_relevant(gold_labels)
                pred_labels = self.get_segmentation_from_prediction(
                    text=text, preds_probs=pred, attns=attn)
                assert len(pred_labels) == len(gold_labels) == len(text)
                gold_set = set(gold)
                pred_set, _ = [set(list(x)) for x in zip(*pred)]
                # import pdb; pdb.set_trace()
                for tag in matrix:
                    if tag in gold_set and tag in pred_set:
                        matrix[tag]["tp"] += 1
                    elif tag not in gold_set and tag in pred_set:
                        matrix[tag]["fp"] += 1
                    elif tag in gold_set and tag not in pred_set:
                        matrix[tag]["fn"] += 1.
                    else:
                        matrix[tag]["tn"] += 1.
                preds = [[x[0], float(x[1])] for x in pred]
                prediction = {
                    "text": text,
                    "pred": preds,
                    "gold": gold,
                    "attn": attn,
                    "pred_labels": pred_labels,
                    "gold_labels": gold_labels
                }
                predictions.append(prediction)
                index += 1
        if prediction_file is not None and prediction_file != "":
            with open(prediction_file, "w") as f:
                json.dump(predictions, f, ensure_ascii=True, indent=4)
        if visualization_file is not None and self._visualize and \
                visualization_file != "":
            self.visualize(predictions, visualization_file)
        if verbose:
            accs = []
            for tag in matrix:
                acc = (matrix[tag]["tp"] + matrix[tag]["tn"]) / \
                    sum(matrix[tag].values()) * 100.
                logger.info(f"Tag: {tag}, Acc: {acc:.2f}")
                accs.append(acc)
            avg_acc = sum(accs) / len(accs)
            logger.info(f"Average ACC: {avg_acc:.2f}")
            p, r, f = fscore_from_preds(predictions, False)
        return predictions
Esempio n. 4
0
def evaluate(
    model: Model,
    instances: Iterable[Instance],
    data_iterator: DataIterator,
    cuda_device: int,
    batch_weight_key: str,
) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            model.decode()
            metrics = model.output_dict['metrics']

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics):
                logger.warning('Metrics with names beginning with "_" will '
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = (", ".join([
                "%s: %.2f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||")
            generator_tqdm.set_description(description, refresh=False)

        model.decode(reset=True)
        final_metrics = model.output_dict['metrics']
        # Sanity check
        if loss_count != batch_count:
            raise RuntimeError(
                "The model you are trying to evaluate only sometimes " +
                "produced a loss!")
        final_metrics["loss"] = total_loss / total_weight

        return final_metrics