def test_f1(caplog): """Unit test of f1_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = f1_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999})
def accuracy_f1_scorer( golds: ndarray, probs: Optional[ndarray], preds: ndarray, uids: Optional[List[str]] = None, pos_label: int = 1, ) -> Dict[str, float]: """Average of accuracy and f1 score. Args: golds: Ground truth values. probs: Predicted probabilities. preds: Predicted values. uids: Unique ids, defaults to None. pos_label: The positive class label, defaults to 1. Returns: Average of accuracy and f1. """ metrics = dict() accuracy = accuracy_scorer(golds, probs, preds, uids) f1 = f1_scorer(golds, probs, preds, uids, pos_label=pos_label) metrics["accuracy_f1"] = mean([accuracy["accuracy"], f1["f1"]]) return metrics
def accuracy_f1_scorer( golds: ndarray, probs: Optional[ndarray], preds: ndarray, uids: Optional[List[str]] = None, ) -> Dict[str, float]: r"""Average of accuracy and f1 score. Args: golds(ndarray): Ground truth values. probs(ndarray or None): Predicted probabilities. preds(ndarray): Predicted values. uids(list, optional): Unique ids, defaults to None. Returns: dict: Average of accuracy and f1. """ metrics = dict() accuracy = accuracy_scorer(golds, probs, preds, uids) f1 = f1_scorer(golds, probs, preds, uids) metrics["accuracy_f1"] = np.mean([accuracy["accuracy"], f1["f1"]]) return metrics
def test_f1(caplog): """Unit test of f1_scorer""" caplog.set_level(logging.INFO) golds = np.array([0, 1, 0, 1, 0, 1]) gold_probs = np.array([[0.6, 0.4], [0.1, 0.9], [0.7, 0.3], [0.2, 0.8], [0.9, 0.1], [0.4, 0.6]]) preds = np.array([0, 0, 0, 0, 0, 1]) metric_dict = f1_scorer(golds, None, preds, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(golds, None, preds, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999}) metric_dict = f1_scorer(gold_probs, None, preds, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(gold_probs, None, preds, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999})
def accuracy_f1_scorer(golds, probs, preds, uids=None): """Average of accuracy and f1 score. :param golds: Ground truth (correct) target values. :type golds: 1-d np.array :param probs: Predicted target probabilities. (Not used!) :type probs: k-d np.array :param preds: Predicted target values. :type preds: 1-d np.array :param uids: Unique ids. :type uids: list :return: Average of accuracy and f1. :rtype: dict """ metrics = dict() accuracy = accuracy_scorer(golds, probs, preds, uids) f1 = f1_scorer(golds, probs, preds, uids) metrics["accuracy_f1"] = np.mean([accuracy["accuracy"], f1["f1"]]) return metrics
def test_f1(caplog): """Unit test of f1_scorer.""" caplog.set_level(logging.INFO) metric_dict = f1_scorer(GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999}) metric_dict = f1_scorer(PROB_GOLDS, PROBS, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(PROB_GOLDS, None, PREDS, pos_label=1) assert isequal(metric_dict, {"f1": 0.5}) metric_dict = f1_scorer(PROB_GOLDS, None, PREDS, pos_label=0) assert isequal(metric_dict, {"f1": 0.7499999999999999})
def em_f1(golds, probs, preds, uids): f1 = f1_scorer(golds, probs, preds, uids) exact = em(golds, probs, preds, uids) return (exact + f1["f1"]) / 2