def test_pred_to_prob(caplog): """Unit test of pred_to_prob.""" caplog.set_level(logging.INFO) assert np.array_equal( pred_to_prob(np.array([0, 1, 2]), 3), np.array([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]]), ) assert np.array_equal( pred_to_prob(np.array([0, 1, 2]), 4), np.array([[1.0, 0, 0, 0], [0, 1.0, 0, 0], [0, 0, 1.0, 0]]), )
def roc_auc_scorer( golds: ndarray, probs: ndarray, preds: Optional[ndarray], uids: Optional[List[str]] = None, ) -> Dict[str, float]: """ROC AUC. Args: golds(ndarray): Ground truth values. probs(ndarray): Predicted probabilities. preds(ndarray or None): Predicted values. uids(list, optional): Unique ids, defaults to None. pos_label(int, optional): The positive class label, defaults to 1. Returns: dict: ROC AUC score. """ if np.array_equal(np.unique(golds), np.array([1, 2])): golds = golds - 1 gold_probs = pred_to_prob(golds, n_classes=probs.shape[1]) try: roc_auc = roc_auc_score(gold_probs, probs) except ValueError: logger.warning( "Only one class present in golds." "ROC AUC score is not defined in that case, set as nan instead.") roc_auc = float("nan") return {"roc_auc": roc_auc}
def roc_auc_scorer(golds, probs, preds, uids=None, pos_label=1): """ROC AUC. :param golds: Ground truth (correct) target values. :type golds: 1-d np.array :param probs: Predicted target probabilities. :type probs: k-d np.array :param preds: Predicted target values. (Not used!) :type preds: 1-d np.array :param uids: Unique ids. :type uids: list :return: Recall. :rtype: dict """ gold_probs = pred_to_prob(golds, n_classes=probs.shape[1]) try: roc_auc = roc_auc_score(gold_probs, probs) except ValueError: logger.warning( "Only one class present in golds." "ROC AUC score is not defined in that case, set as nan instead.") roc_auc = float("nan") return {"roc_auc": roc_auc}
def roc_auc_scorer( golds: ndarray, probs: ndarray, preds: Optional[ndarray], uids: Optional[List[str]] = None, ) -> Dict[str, float]: """ROC AUC. Args: golds: Ground truth values. probs: Predicted probabilities. preds: Predicted values. uids: Unique ids, defaults to None. pos_label: The positive class label, defaults to 1. Returns: ROC AUC score. """ if len(probs.shape) == 2 and probs.shape[1] == 1: probs = probs.reshape(probs.shape[0]) if len(golds.shape) == 2 and golds.shape[1] == 1: golds = golds.reshape(golds.shape[0]) if len(probs.shape) > 1: if len(golds.shape) > 1: golds = pred_to_prob(prob_to_pred(golds), n_classes=probs.shape[1]) else: golds = pred_to_prob(golds, n_classes=probs.shape[1]) else: if len(golds.shape) > 1: golds = prob_to_pred(golds) try: roc_auc = roc_auc_score(golds, probs) except ValueError: logger.warning( "Only one class present in golds." "ROC AUC score is not defined in that case, set as nan instead.") roc_auc = float("nan") return {"roc_auc": roc_auc}
def __init__( self, name, args, split="train", transform_cls=None, index=None, k=1, model="bert-base-uncased", ): X_dict, Y_dict = {"text_name": [], "text": []}, {"labels": []} TEXT = data.Field() LABEL = data.LabelField(dtype=torch.float) train_data, test_data = datasets.IMDB.splits(TEXT, LABEL) dataset = train_data if split == "train" else test_data if index is None: for i, sample in enumerate(dataset.examples): x = clean_web_text(" ".join(sample.text)).split(" ") y = 1 if sample.label == "pos" else 0 X_dict["text_name"].append(f"{name}_{split}_{i}") X_dict["text"].append(x) Y_dict["labels"].append(y) else: for id in index: x = clean_web_text(" ".join( dataset.examples[id].text)).split(" ") y = 1 if dataset.examples[id].label == "pos" else 0 X_dict["image_name"].append(f"{name}_{split}_{id}") X_dict["image"].append(x) Y_dict["labels"].append(y) labels = pred_to_prob(np.array(Y_dict["labels"]), TASK_NUM_CLASS[name]) Y_dict["labels"] = torch.from_numpy(labels) self.data_stats = get_data_stats(dataset.examples) self.vocab = build_vocab(dataset.examples) self.transform_cls = transform_cls if split == "train": self.transform_cls = Augmentation(args, self.vocab, self.data_stats) self.transforms = None self.defaults = [ToTensor(model=model)] # How many augmented samples to augment for each sample self.k = k if k is not None else 1 super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="text_name")
def __init__( self, name, dataset, split="train", transform_cls=None, index=None, prob_label=False, k=1, ): X_dict, Y_dict = {"image_name": [], "image": []}, {"labels": []} if index is None: for i, (x, y) in enumerate(dataset): X_dict["image_name"].append(f"{name}_{split}_{i}") X_dict["image"].append(x) Y_dict["labels"].append(y) else: for id in index: x, y = dataset[id] X_dict["image_name"].append(f"{name}_{split}_{id}") X_dict["image"].append(x) Y_dict["labels"].append(y) if prob_label: labels = pred_to_prob(np.array(Y_dict["labels"]), TASK_NUM_CLASS[name]) else: labels = np.array(Y_dict["labels"]) Y_dict["labels"] = torch.from_numpy(labels) self.transform_cls = transform_cls self.transforms = None self.defaults = [ ToTensor(), Normalize(TASK_NORMALIZE[name]["mean"], TASK_NORMALIZE[name]["std"]), ] # How many augmented samples to augment for each sample self.k = k if k is not None else 1 super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="image_name")
def roc_auc_scorer(golds, probs, preds, uids=None, pos_label=1): """ROC AUC. :param golds: Ground truth (correct) target values. :type golds: 1-d np.array :param probs: Predicted target probabilities. :type probs: k-d np.array :param preds: Predicted target values. (Not used!) :type preds: 1-d np.array :param uids: Unique ids. :type uids: list :return: Recall. :rtype: dict """ gold_probs = pred_to_prob(golds, n_classes=probs.shape[1]) roc_auc = roc_auc_score(gold_probs, probs) return {"roc_auc": roc_auc}
def __init__( self, name, dataset, labels, split="train", transform_cls=None, prefix="", prob_label=False, input_size=224, k=1, ): X_dict, Y_dict = {"image_name": []}, {"labels": []} for i, (x, y) in enumerate(zip(dataset, labels)): X_dict["image_name"].append(f"{prefix}{x[0].context.figure.url}") Y_dict["labels"].append(y) if prob_label: labels = pred_to_prob(np.array(Y_dict["labels"]), 2) else: labels = np.array(Y_dict["labels"]) Y_dict["labels"] = torch.from_numpy(labels) self.transform_cls = transform_cls self.transforms = None self.defaults = [ Resize(input_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ] # How many augmented samples to augment for each sample self.k = k if k is not None else 1 super().__init__(name, X_dict=X_dict, Y_dict=Y_dict, uid="image_name")