def get_gold_labels(self, cand_lists: List[List[Candidate]], annotator: Optional[str] = None) -> List[np.ndarray]: """Load dense matrix of GoldLabels for each candidate_class. :param cand_lists: The candidates to get gold labels for. :type cand_lists: List of list of candidates. :param annotator: A specific annotator key to get labels for. Default None. :type annotator: str :raises ValueError: If get_gold_labels is called before gold labels are loaded, the result will contain ABSTAIN values. We raise a ValueError to help indicate this potential mistake to the user. :return: A list of MxN dense matrix where M are the candidates and N is the annotators. If annotator is provided, return a list of Mx1 matrix. :rtype: list[np.ndarray] """ gold_labels = [ unshift_label_matrix(m) for m in get_sparse_matrix( self.session, GoldLabelKey, cand_lists, key=annotator) ] for cand_labels in gold_labels: if ABSTAIN in cand_labels: raise ValueError("Gold labels contain ABSTAIN labels. " "Did you load gold labels beforehand?") return gold_labels
def convert_labels_to_matrix(labels: List[Dict[str, Any]], keys: List[str]) -> np.ndarray: """Convert labels (the output from LabelerUDF.apply) into a dense matrix. Note that the input labels are 0-indexed (``{0, 1, ..., k}``), while the output labels are -1-indexed (``{-1, 0, ..., k-1}``). :param labels: a list of label mapping (key: key, value=label). :param keys: a list of all keys. """ return unshift_label_matrix(_convert_mappings_to_matrix(labels, keys))
def get_label_matrices(self, cand_lists: List[List[Candidate]]) -> List[np.ndarray]: """Load dense matrix of Labels for each candidate_class. :param cand_lists: The candidates to get labels for. :return: A list of MxN dense matrix where M are the candidates and N is the labeling functions. """ return [ unshift_label_matrix(m) for m in get_sparse_matrix(self.session, LabelKey, cand_lists) ]
def _L_matrix(labels: List[Dict[str, Any]], key_names: List[str]) -> np.ndarray: """Convert labels (the output from LabelerUDF.apply) into a dense matrix. Note that :func:`LabelerUDF.apply` returns a list of list of label mapping, where the outer list represents candidate_classes, while this method takes a list of label mapping of each candidate_class. Also note that the input labels are 0-indexed (``{0, 1, ..., k}``), while the output labels are -1-indexed (``{-1, 0, ..., k-1}``). :param labels: a list of label mapping (key: key_name, value=label). :param key_names: a list of all key_names. """ return unshift_label_matrix(_F_matrix(labels, key_names))
def test_shift_label_matrix(caplog): """Test the label matrix shifter and unshifter.""" caplog.set_level(logging.INFO) """ L is a dense label matrix (ABSTAIN as -1) with values: -1 0 1 -1 """ L = np.array([[-1, 0], [1, -1]]) """ L_sparse is a sparse label matrix (ABSTAIN as 0) 0 1 2 0 """ L_sparse = shift_label_matrix(L) assert np.array_equal(L, unshift_label_matrix(L_sparse)) assert L_sparse.count_nonzero() == 2
def get_gold_labels(self, cand_lists: List[List[Candidate]], annotator: Optional[str] = None) -> List[np.ndarray]: """Load dense matrix of GoldLabels for each candidate_class. :param cand_lists: The candidates to get gold labels for. :type cand_lists: List of list of candidates. :param annotator: A specific annotator key to get labels for. Default None. :type annotator: str :return: A list of MxN dense matrix where M are the candidates and N is the annotators. If annotator is provided, return a list of Mx1 matrix. :rtype: list[np.ndarray] """ return [ unshift_label_matrix(m) for m in get_sparse_matrix( self.session, GoldLabelKey, cand_lists, key=annotator) ]