Exemple #1
0
    def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor, task: EdgeProbingTask):
        """ Compute loss & eval metrics.

        Expect logits and labels to be already "selected" for good targets,
        i.e. this function does not do any masking internally.

        Args:
            logits: [total_num_targets, n_classes] Tensor of float scores
            labels: [total_num_targets, n_classes] Tensor of sparse binary targets

        Returns:
            loss: scalar Tensor
        """
        binary_preds = logits.ge(0).long()  # {0,1}

        # Matthews coefficient and accuracy computed on {0,1} labels.
        task.mcc_scorer(binary_preds, labels.long())
        task.acc_scorer(binary_preds, labels.long())

        # F1Measure() expects [total_num_targets, n_classes, 2]
        # to compute binarized F1.
        binary_scores = torch.stack([-1 * logits, logits], dim=2)
        task.f1_scorer(binary_scores, labels)

        if self.loss_type == "sigmoid":
            return F.binary_cross_entropy(torch.sigmoid(logits), labels.float())
        else:
            raise ValueError("Unsupported loss type '%s' " "for edge probing." % self.loss_type)
Exemple #2
0
def _write_edge_preds(
    task: EdgeProbingTask,
    preds_df: pd.DataFrame,
    pred_dir: str,
    split_name: str,
    join_with_input: bool = True,
):
    """ Write predictions for edge probing task.

    This reads the task data and joins with predictions,
    taking the 'idx' field to represent the line number in the (preprocessed)
    task data file.

    Predictions are saved as JSON with one record per line.
    """
    log.info('!!!')
    log.info(task.name)
    preds_file = os.path.join(pred_dir, f"{task.name}_{split_name}.json")
    # Each row of 'preds' is a NumPy object, need to convert to list for
    # serialization.
    preds_df = preds_df.copy()
    preds_df["preds"] = [a.tolist() for a in preds_df["preds"]]
    if join_with_input:
        preds_df.set_index(["idx"], inplace=True)
        # Load input data and join by row index.
        log.info("Task '%s': joining predictions with input split '%s'",
                 task.name, split_name)
        records = task.get_split_text(split_name)
        # TODO: update this with more prediction types, when available.
        records = (task.merge_preds(r, {"proba": preds_df.at[i, "preds"]})
                   for i, r in enumerate(records))
    else:
        records = (row.to_dict() for _, row in preds_df.iterrows())

    with open(preds_file, "w") as fd:
        for record in records:
            fd.write(json.dumps(record))
            fd.write("\n")
Exemple #3
0
    def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor,
                     task: EdgeProbingTask):
        """ Compute loss & eval metrics.

        Expect logits and labels to be already "selected" for good targets,
        i.e. this function does not do any masking internally.

        Args:
            logits: [total_num_targets, n_classes] Tensor of float scores
            labels: [total_num_targets, n_classes] Tensor of sparse binary targets

        Returns:
            loss: scalar Tensor
        """
        if self.loss_type == "sigmoid":
            binary_preds = logits.ge(0).long()  # {0,1}

            # Matthews coefficient and accuracy computed on {0,1} labels.
            task.mcc_scorer(binary_preds, labels.long())
            task.acc_scorer(binary_preds, labels.long())

            #print("\n\n\n", torch.sum(labels, dim=1), "\n\n\n")

            # F1Measure() expects [total_num_targets, n_classes, 2]
            # to compute binarized F1.
            binary_scores = torch.stack([-1 * logits, logits], dim=2)
            task.f1_scorer(binary_scores, labels)

            loss = F.binary_cross_entropy(torch.sigmoid(logits),
                                          labels.float())
            task.xent_scorer(loss.mean().item())
            return loss

        elif self.loss_type == "softmax":

            preds = one_hot(logits.argmax(dim=-1), depth=logits.shape[-1])

            # Matthews coefficient and accuracy computed on {0,1} labels.
            task.mcc_scorer(preds.long(), labels.long())
            task.acc_scorer(preds.long(), labels.long())

            # print("\n\n\n", torch.sum(labels, dim=1), "\n\n\n")

            # F1Measure() expects [total_num_targets, n_classes, 2]
            # to compute binarized F1.
            binary_scores = torch.stack([-1 * logits, logits], dim=2)
            task.f1_scorer(binary_scores, labels)

            loss = F.cross_entropy(logits, labels.argmax(dim=-1))
            task.xent_scorer(loss.mean().item())
            return loss
        else:
            raise ValueError("Unsupported loss type '%s' "
                             "for edge probing." % self.loss_type)