Beispiel #1
0
def test_array_to_numpy(caplog):
    """Unit test of array_to_numpy."""
    caplog.set_level(logging.INFO)

    assert (np.array_equal(array_to_numpy([[1, 2], [3, 4]]),
                           np.array([[1, 2], [3, 4]])) is True)
    assert (np.array_equal(torch.tensor([[1, 2], [3, 4]]),
                           np.array([[1, 2], [3, 4]])) is True)
    assert np.array_equal(array_to_numpy([[1, 2], [3, 4]], flatten=True),
                          np.array([1, 2, 3, 4]))
Beispiel #2
0
def test_array_to_numpy(caplog):
    """Unit test of array_to_numpy."""
    caplog.set_level(logging.INFO)

    assert (np.array_equal(array_to_numpy([[1, 2], [3, 4]]),
                           np.array([[1, 2], [3, 4]])) is True)
    assert (np.array_equal(array_to_numpy(torch.tensor([[1, 2], [3, 4]])),
                           np.array([[1, 2], [3, 4]])) is True)
    assert np.array_equal(array_to_numpy([[1, 2], [3, 4]], flatten=True),
                          np.array([1, 2, 3, 4]))

    with pytest.raises(ValueError):
        array_to_numpy(1.23)
Beispiel #3
0
    def score(
        self,
        golds: Union[ndarray, List[ndarray]],
        preds: Union[ndarray, List[ndarray]],
        probs: Union[ndarray, List[ndarray]],
        uids: List[str] = None,
    ) -> Dict[str, float]:
        """Calculate the score.

        Args:
          golds: Ground truth values.
          probs: Predicted probabilities.
          preds: Predicted values.
          uids: Unique ids, defaults to None.

        Returns:
          Score dict.
        """
        metric_dict = dict()

        for metric_name, metric in self.metrics.items():
            # handle no examples
            if len(golds) == 0:
                metric_dict[metric_name] = float("nan")
                continue
            try:
                golds = array_to_numpy(golds)
            except ValueError:
                pass
            try:
                probs = array_to_numpy(probs) if probs is not None else probs
            except ValueError:
                pass
            try:
                preds = array_to_numpy(preds) if preds is not None else preds
            except ValueError:
                pass
            res = metric(golds, preds, probs, uids)

            if isinstance(res, dict):
                metric_dict.update(res)
            else:
                metric_dict[metric_name] = res

        return metric_dict
Beispiel #4
0
 def collect_result(uid_d, gold_d, pred_d, prob_d, out_d,
                    cur_sentidx_nummen):
     """Merges results for the sentences where all mentions have been
     evaluated."""
     final_uid_d = defaultdict(list)
     final_prob_d = defaultdict(list)
     final_pred_d = defaultdict(list)
     final_gold_d = defaultdict(list)
     final_out_d = defaultdict(lambda: defaultdict(list))
     sentidxs_finalized = []
     # print("FINALIZE", cur_sentidx_nummen, [sent_idx2num_mens[str(k)] for k in cur_sentidx_nummen])
     for sent_idx, cur_mention_set in cur_sentidx_nummen.items():
         assert (
             len(cur_mention_set) <= sent_idx2num_mens[str(sent_idx)]
         ), f"Too many mentions for {sent_idx}: {cur_mention_set} VS {sent_idx2num_mens[str(sent_idx)]}"
         if len(cur_mention_set) == sent_idx2num_mens[str(sent_idx)]:
             sentidxs_finalized.append(sent_idx)
             for task_name in uid_d:
                 final_uid_d[task_name].extend(uid_d[task_name][sent_idx])
                 final_prob_d[task_name].extend(prob_d[task_name][sent_idx])
                 final_pred_d[task_name].extend(pred_d[task_name][sent_idx])
                 final_gold_d[task_name].extend(gold_d[task_name][sent_idx])
                 if task_name in out_d.keys():
                     for action_name in out_d[task_name].keys():
                         final_out_d[task_name][action_name].extend(
                             out_d[task_name][action_name][sent_idx])
     # If batch size is close to 1 and accumulation step was close to 1,
     # we may get to where there are no complete sentences
     if len(sentidxs_finalized) == 0:
         return {}, sentidxs_finalized
     res = {
         "uids": final_uid_d,
         "golds": final_gold_d,
     }
     for task_name in final_prob_d.keys():
         final_prob_d[task_name] = array_to_numpy(final_prob_d[task_name])
     res["probs"] = final_prob_d
     for task_name in final_pred_d.keys():
         final_pred_d[task_name] = array_to_numpy(final_pred_d[task_name])
     res["preds"] = final_pred_d
     res["outputs"] = final_out_d
     return res, sentidxs_finalized
Beispiel #5
0
    def score(self, golds, preds, probs, uids=None):
        metric_dict = dict()

        for metric_name, metric in self.metrics.items():
            # handle no examples
            if len(golds) == 0:
                metric_dict[metric_name] = float("nan")
                continue

            golds = array_to_numpy(golds)
            preds = array_to_numpy(preds)
            probs = array_to_numpy(probs)

            res = metric(golds, preds, probs, uids)

            if isinstance(res, dict):
                metric_dict.update(res)
            else:
                metric_dict[metric_name] = res

        return metric_dict
Beispiel #6
0
    def score(self,
              golds: ndarray,
              preds: ndarray,
              probs: ndarray,
              uids: List[str] = None) -> Dict[str, float]:
        """Calculate the score.

        Args:
          golds(ndarray): Ground truth values.
          probs(ndarray): Predicted probabilities.
          preds(ndarray): Predicted values.
          uids(list, optional): Unique ids, defaults to None.

        Returns:
          dict: score dict.

        """

        metric_dict = dict()

        for metric_name, metric in self.metrics.items():
            # handle no examples
            if len(golds) == 0:
                metric_dict[metric_name] = float("nan")
                continue

            golds = array_to_numpy(golds)
            preds = array_to_numpy(preds)
            probs = array_to_numpy(probs)

            res = metric(golds, preds, probs, uids)

            if isinstance(res, dict):
                metric_dict.update(res)
            else:
                metric_dict[metric_name] = res

        return metric_dict
Beispiel #7
0
    def predict(
        self,
        dataloader: EmmentalDataLoader,
        return_probs: bool = True,
        return_preds: bool = False,
        return_action_outputs: bool = False,
    ) -> Dict[str, Any]:
        """Predict from dataloader.

        Args:
          dataloader: The dataloader to predict.
          return_probs: Whether return prob not, defaults to True.
          return_preds: Whether return predictions or not, defaults to False.
          return_action_outputs: Whether return action_outputs or not,
          defaults to False.

        Returns:
          The result dict.
        """
        self.eval()

        uid_dict: Dict[str, List[str]] = defaultdict(list)
        prob_dict: Dict[str, Union[ndarray, List[ndarray]]] = defaultdict(list)
        pred_dict: Dict[str, Union[ndarray, List[ndarray]]] = defaultdict(list)
        gold_dict: Dict[str, List[Union[ndarray, int, float]]] = defaultdict(list)
        out_dict: Dict[str, Dict[str, List[Union[ndarray, int, float]]]] = defaultdict(
            lambda: defaultdict(list)
        )
        loss_dict: Dict[str, Union[ndarray, float]] = defaultdict(list)  # type: ignore

        if not dataloader.is_learnable:
            gold_dict = None
            loss_dict = None

        # Collect dataloader information
        task_to_label_dict = dataloader.task_to_label_dict
        uid = dataloader.uid

        with torch.no_grad():
            for batch_num, bdict in tqdm(
                enumerate(dataloader),
                total=len(dataloader),
                desc=f"Evaluating {dataloader.data_name} ({dataloader.split})",
            ):
                if isinstance(bdict, dict) == 1:
                    X_bdict = bdict
                    Y_bdict = None
                else:
                    X_bdict, Y_bdict = bdict
                    if not dataloader.is_learnable:
                        Y_bdict = None

                if return_action_outputs:
                    (
                        uid_bdict,
                        loss_bdict,
                        prob_bdict,
                        gold_bdict,
                        out_bdict,
                    ) = self.forward(  # type: ignore
                        X_bdict[uid],
                        X_bdict,
                        Y_bdict,
                        task_to_label_dict,
                        return_action_outputs=return_action_outputs,
                        return_probs=return_probs or return_preds,
                    )
                else:
                    (
                        uid_bdict,
                        loss_bdict,
                        prob_bdict,
                        gold_bdict,
                    ) = self.forward(  # type: ignore
                        X_bdict[uid],
                        X_bdict,
                        Y_bdict,
                        task_to_label_dict,
                        return_action_outputs=return_action_outputs,
                        return_probs=return_probs or return_preds,
                    )
                    out_bdict = None
                for task_name in uid_bdict.keys():
                    uid_dict[task_name].extend(uid_bdict[task_name])
                    if return_probs:
                        prob_dict[task_name].extend(  # type: ignore
                            prob_bdict[task_name]
                        )
                    if return_preds:
                        pred_dict[task_name].extend(  # type: ignore
                            prob_to_pred(prob_bdict[task_name])
                        )
                    if dataloader.is_learnable:
                        gold_dict[task_name].extend(gold_bdict[task_name])
                        if len(loss_bdict[task_name].size()) == 0:
                            if loss_dict[task_name] == []:
                                loss_dict[task_name] = 0
                            loss_dict[task_name] += loss_bdict[task_name].item() * len(
                                uid_bdict[task_name]
                            )
                        else:
                            loss_dict[task_name].extend(  # type: ignore
                                loss_bdict[task_name].cpu().numpy()
                            )
                if return_action_outputs and out_bdict:
                    for task_name in out_bdict.keys():
                        for action_name in out_bdict[task_name].keys():
                            out_dict[task_name][action_name].extend(
                                out_bdict[task_name][action_name]
                            )

        # Calculate average loss
        if dataloader.is_learnable:
            for task_name in uid_dict.keys():
                if not isinstance(loss_dict[task_name], list):
                    loss_dict[task_name] /= len(uid_dict[task_name])

        res = {
            "uids": uid_dict,
            "golds": gold_dict,
            "losses": loss_dict,
        }

        if return_probs:
            for task_name in prob_dict.keys():
                prob_dict[task_name] = array_to_numpy(prob_dict[task_name])
            res["probs"] = prob_dict

        if return_preds:
            for task_name in pred_dict.keys():
                pred_dict[task_name] = array_to_numpy(pred_dict[task_name])
            res["preds"] = pred_dict

        if return_action_outputs:
            res["outputs"] = out_dict

        return res