Exemple #1
0
def test_pred_to_prob(caplog):
    """Unit test of pred_to_prob."""
    caplog.set_level(logging.INFO)

    assert np.array_equal(
        pred_to_prob(np.array([0, 1, 2]), 3),
        np.array([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]]),
    )
    assert np.array_equal(
        pred_to_prob(np.array([0, 1, 2]), 4),
        np.array([[1.0, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 1.0, 0]]),
    )
Exemple #2
0
def roc_auc_scorer(
    golds: ndarray,
    probs: ndarray,
    preds: Optional[ndarray],
    uids: Optional[List[str]] = None,
) -> Dict[str, float]:
    """ROC AUC.

    Args:
      golds(ndarray): Ground truth values.
      probs(ndarray): Predicted probabilities.
      preds(ndarray or None): Predicted values.
      uids(list, optional): Unique ids, defaults to None.
      pos_label(int, optional): The positive class label, defaults to 1.

    Returns:
      dict: ROC AUC score.

    """
    if np.array_equal(np.unique(golds), np.array([1, 2])):
        golds = golds - 1

    gold_probs = pred_to_prob(golds, n_classes=probs.shape[1])

    try:
        roc_auc = roc_auc_score(gold_probs, probs)
    except ValueError:
        logger.warning(
            "Only one class present in golds."
            "ROC AUC score is not defined in that case, set as nan instead.")
        roc_auc = float("nan")

    return {"roc_auc": roc_auc}
Exemple #3
0
def roc_auc_scorer(golds, probs, preds, uids=None, pos_label=1):
    """ROC AUC.

    :param golds: Ground truth (correct) target values.
    :type golds: 1-d np.array
    :param probs: Predicted target probabilities.
    :type probs: k-d np.array
    :param preds: Predicted target values. (Not used!)
    :type preds: 1-d np.array
    :param uids: Unique ids.
    :type uids: list
    :return: Recall.
    :rtype: dict
    """

    gold_probs = pred_to_prob(golds, n_classes=probs.shape[1])

    try:
        roc_auc = roc_auc_score(gold_probs, probs)
    except ValueError:
        logger.warning(
            "Only one class present in golds."
            "ROC AUC score is not defined in that case, set as nan instead.")
        roc_auc = float("nan")

    return {"roc_auc": roc_auc}
Exemple #4
0
def roc_auc_scorer(
    golds: ndarray,
    probs: ndarray,
    preds: Optional[ndarray],
    uids: Optional[List[str]] = None,
) -> Dict[str, float]:
    """ROC AUC.

    Args:
      golds: Ground truth values.
      probs: Predicted probabilities.
      preds: Predicted values.
      uids: Unique ids, defaults to None.
      pos_label: The positive class label, defaults to 1.

    Returns:
      ROC AUC score.
    """
    if len(probs.shape) == 2 and probs.shape[1] == 1:
        probs = probs.reshape(probs.shape[0])

    if len(golds.shape) == 2 and golds.shape[1] == 1:
        golds = golds.reshape(golds.shape[0])

    if len(probs.shape) > 1:
        if len(golds.shape) > 1:
            golds = pred_to_prob(prob_to_pred(golds), n_classes=probs.shape[1])
        else:
            golds = pred_to_prob(golds, n_classes=probs.shape[1])
    else:
        if len(golds.shape) > 1:
            golds = prob_to_pred(golds)

    try:
        roc_auc = roc_auc_score(golds, probs)
    except ValueError:
        logger.warning(
            "Only one class present in golds."
            "ROC AUC score is not defined in that case, set as nan instead.")
        roc_auc = float("nan")

    return {"roc_auc": roc_auc}
Exemple #5
0
    def __init__(
        self,
        name,
        args,
        split="train",
        transform_cls=None,
        index=None,
        k=1,
        model="bert-base-uncased",
    ):
        X_dict, Y_dict = {"text_name": [], "text": []}, {"labels": []}

        TEXT = data.Field()
        LABEL = data.LabelField(dtype=torch.float)
        train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
        dataset = train_data if split == "train" else test_data

        if index is None:
            for i, sample in enumerate(dataset.examples):
                x = clean_web_text(" ".join(sample.text)).split(" ")
                y = 1 if sample.label == "pos" else 0
                X_dict["text_name"].append(f"{name}_{split}_{i}")
                X_dict["text"].append(x)
                Y_dict["labels"].append(y)
        else:
            for id in index:
                x = clean_web_text(" ".join(
                    dataset.examples[id].text)).split(" ")
                y = 1 if dataset.examples[id].label == "pos" else 0
                X_dict["image_name"].append(f"{name}_{split}_{id}")
                X_dict["image"].append(x)
                Y_dict["labels"].append(y)

        labels = pred_to_prob(np.array(Y_dict["labels"]), TASK_NUM_CLASS[name])

        Y_dict["labels"] = torch.from_numpy(labels)

        self.data_stats = get_data_stats(dataset.examples)
        self.vocab = build_vocab(dataset.examples)

        self.transform_cls = transform_cls
        if split == "train":
            self.transform_cls = Augmentation(args, self.vocab,
                                              self.data_stats)
        self.transforms = None

        self.defaults = [ToTensor(model=model)]

        # How many augmented samples to augment for each sample
        self.k = k if k is not None else 1

        super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="text_name")
Exemple #6
0
    def __init__(
        self,
        name,
        dataset,
        split="train",
        transform_cls=None,
        index=None,
        prob_label=False,
        k=1,
    ):
        X_dict, Y_dict = {"image_name": [], "image": []}, {"labels": []}
        if index is None:
            for i, (x, y) in enumerate(dataset):
                X_dict["image_name"].append(f"{name}_{split}_{i}")
                X_dict["image"].append(x)
                Y_dict["labels"].append(y)
        else:
            for id in index:
                x, y = dataset[id]
                X_dict["image_name"].append(f"{name}_{split}_{id}")
                X_dict["image"].append(x)
                Y_dict["labels"].append(y)

        if prob_label:
            labels = pred_to_prob(np.array(Y_dict["labels"]),
                                  TASK_NUM_CLASS[name])
        else:
            labels = np.array(Y_dict["labels"])

        Y_dict["labels"] = torch.from_numpy(labels)

        self.transform_cls = transform_cls
        self.transforms = None

        self.defaults = [
            ToTensor(),
            Normalize(TASK_NORMALIZE[name]["mean"],
                      TASK_NORMALIZE[name]["std"]),
        ]

        # How many augmented samples to augment for each sample
        self.k = k if k is not None else 1

        super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="image_name")
Exemple #7
0
def roc_auc_scorer(golds, probs, preds, uids=None, pos_label=1):
    """ROC AUC.

    :param golds: Ground truth (correct) target values.
    :type golds: 1-d np.array
    :param probs: Predicted target probabilities.
    :type probs: k-d np.array
    :param preds: Predicted target values. (Not used!)
    :type preds: 1-d np.array
    :param uids: Unique ids.
    :type uids: list
    :return: Recall.
    :rtype: dict
    """

    gold_probs = pred_to_prob(golds, n_classes=probs.shape[1])
    roc_auc = roc_auc_score(gold_probs, probs)

    return {"roc_auc": roc_auc}
Exemple #8
0
    def __init__(
        self,
        name,
        dataset,
        labels,
        split="train",
        transform_cls=None,
        prefix="",
        prob_label=False,
        input_size=224,
        k=1,
    ):
        X_dict, Y_dict = {"image_name": []}, {"labels": []}
        for i, (x, y) in enumerate(zip(dataset, labels)):
            X_dict["image_name"].append(f"{prefix}{x[0].context.figure.url}")
            Y_dict["labels"].append(y)

        if prob_label:
            labels = pred_to_prob(np.array(Y_dict["labels"]), 2)
        else:
            labels = np.array(Y_dict["labels"])

        Y_dict["labels"] = torch.from_numpy(labels)

        self.transform_cls = transform_cls
        self.transforms = None

        self.defaults = [
            Resize(input_size),
            ToTensor(),
            Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]

        # How many augmented samples to augment for each sample
        self.k = k if k is not None else 1

        super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="image_name")