Ejemplo n.º 1
0
    def _aggregate_running_metrics(self, model):
        """Calculate the running overall and task specific metrics."""

        metric_dict = dict()

        total_count = 0
        # Log task specific loss
        for identifier in self.running_uids.keys():
            count = len(self.running_uids[identifier])
            if count > 0:
                metric_dict[identifier + "/loss"] = (
                    self.running_losses[identifier] / count
                )
            total_count += count

        # Calculate average micro loss
        if total_count > 0:
            total_loss = sum(self.running_losses.values())
            metric_dict["model/all/train/loss"] = total_loss / total_count

        micro_score_dict = defaultdict(list)
        macro_score_dict = defaultdict(list)

        # Calculate training metric
        for identifier in self.running_uids.keys():
            task_name, data_name, split = identifier.split("/")

            metric_score = model.scorers[task_name].score(
                self.running_golds[identifier],
                self.running_probs[identifier],
                prob_to_pred(self.running_probs[identifier]),
                self.running_uids[identifier],
            )
            for metric_name, metric_value in metric_score.items():
                metric_dict[f"{identifier}/{metric_name}"] = metric_value

            # Collect average score
            identifier = construct_identifier(task_name, data_name, split, "average")

            metric_dict[identifier] = np.mean(list(metric_score.values()))

            micro_score_dict[split].extend(list(metric_score.values()))
            macro_score_dict[split].append(metric_dict[identifier])

        # Collect split-wise micro/macro average score
        for split in micro_score_dict.keys():
            identifier = construct_identifier("model", "all", split, "micro_average")
            metric_dict[identifier] = np.mean(micro_score_dict[split])
            identifier = construct_identifier("model", "all", split, "macro_average")
            metric_dict[identifier] = np.mean(macro_score_dict[split])

        # Log the learning rate
        metric_dict["model/all/train/lr"] = self.optimizer.param_groups[0]["lr"]

        return metric_dict
Ejemplo n.º 2
0
    def score(self, dataloaders, return_average=True):
        """Score the data from dataloader with the model

        :param dataloaders: the dataloader that performs scoring
        :type dataloaders: dataloader
        :param return_average: Whether return average scores
        :type return_average: bool
        """

        self.eval()

        if not isinstance(dataloaders, list):
            dataloaders = [dataloaders]

        metric_score_dict = dict()

        if return_average:
            micro_score_dict = defaultdict(list)
            macro_score_dict = defaultdict(list)
            macro_loss_dict = defaultdict(list)

        for dataloader in dataloaders:
            predictions = self.predict(dataloader, return_preds=True)
            for task_name in predictions["golds"].keys():
                metric_score = self.scorers[task_name].score(
                    predictions["golds"][task_name],
                    predictions["probs"][task_name],
                    predictions["preds"][task_name],
                    predictions["uids"][task_name],
                )
                for metric_name, metric_value in metric_score.items():
                    identifier = construct_identifier(task_name,
                                                      dataloader.data_name,
                                                      dataloader.split,
                                                      metric_name)
                    metric_score_dict[identifier] = metric_value

                # Store the loss
                identifier = construct_identifier(task_name,
                                                  dataloader.data_name,
                                                  dataloader.split, "loss")
                metric_score_dict[identifier] = predictions["losses"][
                    task_name]

                if return_average:
                    # Collect average score
                    identifier = construct_identifier(task_name,
                                                      dataloader.data_name,
                                                      dataloader.split,
                                                      "average")
                    metric_score_dict[identifier] = np.mean(
                        list(metric_score.values()))

                    micro_score_dict[dataloader.split].extend(
                        list(metric_score.values()))
                    macro_score_dict[dataloader.split].append(
                        metric_score_dict[identifier])

                    # Store the loss
                    identifier = construct_identifier(task_name,
                                                      dataloader.data_name,
                                                      dataloader.split, "loss")
                    macro_loss_dict[dataloader.split].append(
                        metric_score_dict[identifier])

        if return_average:
            # Collect split-wise micro/macro average score
            for split in micro_score_dict.keys():
                identifier = construct_identifier("model", "all", split,
                                                  "micro_average")
                metric_score_dict[identifier] = np.mean(
                    micro_score_dict[split])
                identifier = construct_identifier("model", "all", split,
                                                  "macro_average")
                metric_score_dict[identifier] = np.mean(
                    macro_score_dict[split])
                identifier = construct_identifier("model", "all", split,
                                                  "loss")
                metric_score_dict[identifier] = np.mean(macro_loss_dict[split])

            # Collect overall micro/macro average score/loss
            identifier = construct_identifier("model", "all", "all",
                                              "micro_average")
            metric_score_dict[identifier] = np.mean(
                list(micro_score_dict.values()))
            identifier = construct_identifier("model", "all", "all",
                                              "macro_average")
            metric_score_dict[identifier] = np.mean(
                list(macro_score_dict.values()))
            identifier = construct_identifier("model", "all", "all", "loss")
            metric_score_dict[identifier] = np.mean(
                list(macro_loss_dict.values()))

        # TODO: have a better to handle global evaluation metric
        if Meta.config["learner_config"]["global_evaluation_metric_dict"]:
            global_evaluation_metric_dict = Meta.config["learner_config"][
                "global_evaluation_metric_dict"]
            for metric_name, metric in global_evaluation_metric_dict.items():
                metric_score_dict[metric_name] = metric(metric_score_dict)

        return metric_score_dict
Ejemplo n.º 3
0
    def score(
        self,
        dataloaders: Union[EmmentalDataLoader, List[EmmentalDataLoader]],
        return_average: bool = True,
    ) -> Dict[str, float]:
        """Score the data from dataloader.

        Args:
          dataloaders: The dataloaders to score.
          return_average: Whether to return average score.

        Returns:
          Score dict.
        """
        self.eval()

        if not isinstance(dataloaders, list):
            dataloaders = [dataloaders]

        metric_score_dict = dict()

        if return_average:
            micro_score_dict: defaultdict = defaultdict(list)
            macro_score_dict: defaultdict = defaultdict(list)
            macro_loss_dict: defaultdict = defaultdict(list)

        for dataloader in dataloaders:
            if not dataloader.is_learnable:
                logger.warning(
                    f"Dataloader {dataloader.data_name} doesn't have gold data, "
                    f"continue..."
                )
                continue

            return_probs = False
            return_preds = False
            for task_name in dataloader.task_to_label_dict:
                return_probs = return_probs or self.require_prob_for_evals[task_name]
                return_preds = return_preds or self.require_pred_for_evals[task_name]

            predictions = self.predict(
                dataloader,
                return_probs=return_probs,
                return_preds=return_preds,
                return_action_outputs=False,
            )
            for task_name in predictions["uids"].keys():
                metric_score = self.scorers[task_name].score(
                    predictions["golds"][task_name],
                    predictions["probs"][task_name] if return_probs else None,
                    predictions["preds"][task_name] if return_preds else None,
                    predictions["uids"][task_name],
                )
                for metric_name, metric_value in metric_score.items():
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, metric_name
                    )
                    metric_score_dict[identifier] = metric_value

                # Store the loss
                identifier = construct_identifier(
                    task_name, dataloader.data_name, dataloader.split, "loss"
                )
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    predictions["losses"][task_name]
                )

                if return_average:
                    # Collect average score
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, "average"
                    )
                    metric_score_dict[identifier] = np.mean(  # type: ignore
                        list(metric_score.values())
                    )

                    micro_score_dict[dataloader.split].extend(
                        list(metric_score.values())
                    )
                    macro_score_dict[dataloader.split].append(
                        metric_score_dict[identifier]
                    )

                    # Store the loss
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, "loss"
                    )
                    macro_loss_dict[dataloader.split].append(
                        metric_score_dict[identifier]
                    )

        if return_average:
            # Collect split-wise micro/macro average score
            for split in micro_score_dict.keys():
                identifier = construct_identifier(
                    "model", "all", split, "micro_average"
                )
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    micro_score_dict[split]
                )
                identifier = construct_identifier(
                    "model", "all", split, "macro_average"
                )
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    macro_score_dict[split]
                )
                identifier = construct_identifier("model", "all", split, "loss")
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    macro_loss_dict[split]
                )

            # Collect overall micro/macro average score/loss
            if len(micro_score_dict):
                identifier = construct_identifier(
                    "model", "all", "all", "micro_average"
                )
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    list(itertools.chain.from_iterable(micro_score_dict.values()))
                )
            if len(macro_score_dict):
                identifier = construct_identifier(
                    "model", "all", "all", "macro_average"
                )
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    list(itertools.chain.from_iterable(macro_score_dict.values()))
                )
            if len(macro_loss_dict):
                identifier = construct_identifier("model", "all", "all", "loss")
                metric_score_dict[identifier] = np.mean(  # type: ignore
                    list(itertools.chain.from_iterable(macro_loss_dict.values()))
                )

        # TODO: have a better to handle global evaluation metric
        if Meta.config["learner_config"]["global_evaluation_metric_dict"]:
            global_evaluation_metric_dict = Meta.config["learner_config"][
                "global_evaluation_metric_dict"
            ]
            for metric_name, metric in global_evaluation_metric_dict.items():
                metric_score_dict[metric_name] = metric(metric_score_dict)

        return metric_score_dict
Ejemplo n.º 4
0
def test_construct_identifier(caplog):
    """Unit test of construct_identifier."""
    caplog.set_level(logging.INFO)

    assert construct_identifier("1", "2", "3", "4") == "1/2/3/4"
    assert construct_identifier("1", "2", "3") == "1/2/3"
Ejemplo n.º 5
0
    def _aggregate_running_metrics(
            self,
            model: EmmentalModel,
            calc_running_scores: bool = False) -> Dict[str, float]:
        """Calculate the running overall and task specific metrics.

        Args:
          model: The model to evaluate.
          calc_running_scores: Whether to calc running scores

        Returns:
          The score dict.
        """
        metric_dict: Dict[str, float] = dict()

        total_count = 0
        # Log task specific loss
        for identifier in self.running_uids.keys():
            count = len(self.running_uids[identifier])
            if count > 0:
                metric_dict[identifier + "/loss"] = float(
                    self.running_losses[identifier] / count)
            total_count += count

        # Calculate average micro loss
        if total_count > 0:
            total_loss = sum(self.running_losses.values())
            metric_dict["model/all/train/loss"] = float(total_loss /
                                                        total_count)

        if calc_running_scores:
            micro_score_dict: Dict[str, List[float]] = defaultdict(list)
            macro_score_dict: Dict[str, List[float]] = defaultdict(list)

            # Calculate training metric
            for identifier in self.running_uids.keys():
                task_name, data_name, split = identifier.split("/")

                if (model.scorers[task_name] and self.running_golds[identifier]
                        and self.running_probs[identifier]):
                    metric_score = model.scorers[task_name].score(
                        self.running_golds[identifier],
                        self.running_probs[identifier],
                        prob_to_pred(self.running_probs[identifier]),
                        self.running_uids[identifier],
                    )
                    for metric_name, metric_value in metric_score.items():
                        metric_dict[
                            f"{identifier}/{metric_name}"] = metric_value

                    # Collect average score
                    identifier = construct_identifier(task_name, data_name,
                                                      split, "average")
                    metric_dict[identifier] = np.mean(
                        list(metric_score.values()))
                    micro_score_dict[split].extend(
                        list(metric_score.values())  # type: ignore
                    )
                    macro_score_dict[split].append(metric_dict[identifier])

                # Collect split-wise micro/macro average score
                for split in micro_score_dict.keys():
                    identifier = construct_identifier("model", "all", split,
                                                      "micro_average")
                    metric_dict[identifier] = np.mean(
                        micro_score_dict[split]  # type: ignore
                    )
                    identifier = construct_identifier("model", "all", split,
                                                      "macro_average")
                    metric_dict[identifier] = np.mean(
                        macro_score_dict[split]  # type: ignore
                    )

        # Log the learning rate
        metric_dict["model/all/train/lr"] = self.optimizer.param_groups[0][
            "lr"]

        return metric_dict
Ejemplo n.º 6
0
    def score(
        self,
        dataloaders: Union[EmmentalDataLoader, List[EmmentalDataLoader]],
        return_average: bool = True,
    ) -> Dict[str, float]:
        """Score the data from dataloader.

        Args:
          dataloaders(EmmentalDataLoader or List[EmmentalDataLoader]): The dataloaders
            to score.
          return_average(bool): Whether to return average score.

        Returns:
          dict: Score dict.

        """

        self.eval()

        if not isinstance(dataloaders, list):
            dataloaders = [dataloaders]

        metric_score_dict = dict()

        if return_average:
            micro_score_dict: defaultdict = defaultdict(list)
            macro_score_dict: defaultdict = defaultdict(list)
            macro_loss_dict: defaultdict = defaultdict(list)

        for dataloader in dataloaders:
            predictions = self.predict(dataloader, return_preds=True)
            for task_name in predictions["golds"].keys():
                metric_score = self.scorers[task_name].score(
                    predictions["golds"][task_name],
                    predictions["probs"][task_name],
                    predictions["preds"][task_name],
                    predictions["uids"][task_name],
                )
                for metric_name, metric_value in metric_score.items():
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, metric_name
                    )
                    metric_score_dict[identifier] = metric_value

                # Store the loss
                identifier = construct_identifier(
                    task_name, dataloader.data_name, dataloader.split, "loss"
                )
                metric_score_dict[identifier] = np.mean(
                    predictions["losses"][task_name]
                )

                if return_average:
                    # Collect average score
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, "average"
                    )
                    metric_score_dict[identifier] = np.mean(list(metric_score.values()))

                    micro_score_dict[dataloader.split].extend(
                        list(metric_score.values())
                    )
                    macro_score_dict[dataloader.split].append(
                        metric_score_dict[identifier]
                    )

                    # Store the loss
                    identifier = construct_identifier(
                        task_name, dataloader.data_name, dataloader.split, "loss"
                    )
                    macro_loss_dict[dataloader.split].append(
                        metric_score_dict[identifier]
                    )

        if return_average:
            # Collect split-wise micro/macro average score
            for split in micro_score_dict.keys():
                identifier = construct_identifier(
                    "model", "all", split, "micro_average"
                )
                metric_score_dict[identifier] = np.mean(micro_score_dict[split])
                identifier = construct_identifier(
                    "model", "all", split, "macro_average"
                )
                metric_score_dict[identifier] = np.mean(macro_score_dict[split])
                identifier = construct_identifier("model", "all", split, "loss")
                metric_score_dict[identifier] = np.mean(macro_loss_dict[split])

            # Collect overall micro/macro average score/loss
            identifier = construct_identifier("model", "all", "all", "micro_average")
            metric_score_dict[identifier] = np.mean(
                list(itertools.chain.from_iterable(micro_score_dict.values()))
            )
            identifier = construct_identifier("model", "all", "all", "macro_average")
            metric_score_dict[identifier] = np.mean(
                list(itertools.chain.from_iterable(macro_score_dict.values()))
            )
            identifier = construct_identifier("model", "all", "all", "loss")
            metric_score_dict[identifier] = np.mean(
                list(itertools.chain.from_iterable(macro_loss_dict.values()))
            )

        # TODO: have a better to handle global evaluation metric
        if Meta.config["learner_config"]["global_evaluation_metric_dict"]:
            global_evaluation_metric_dict = Meta.config["learner_config"][
                "global_evaluation_metric_dict"
            ]
            for metric_name, metric in global_evaluation_metric_dict.items():
                metric_score_dict[metric_name] = metric(metric_score_dict)

        return metric_score_dict