Ejemplo n.º 1
0
    def get_gold_labels(self,
                        cand_lists: List[List[Candidate]],
                        annotator: Optional[str] = None) -> List[np.ndarray]:
        """Load dense matrix of GoldLabels for each candidate_class.

        :param cand_lists: The candidates to get gold labels for.
        :type cand_lists: List of list of candidates.
        :param annotator: A specific annotator key to get labels for. Default
            None.
        :type annotator: str
        :raises ValueError: If get_gold_labels is called before gold labels are
            loaded, the result will contain ABSTAIN values. We raise a
            ValueError to help indicate this potential mistake to the user.
        :return: A list of MxN dense matrix where M are the candidates and N is the
            annotators. If annotator is provided, return a list of Mx1 matrix.
        :rtype: list[np.ndarray]
        """
        gold_labels = [
            unshift_label_matrix(m) for m in get_sparse_matrix(
                self.session, GoldLabelKey, cand_lists, key=annotator)
        ]

        for cand_labels in gold_labels:
            if ABSTAIN in cand_labels:
                raise ValueError("Gold labels contain ABSTAIN labels. "
                                 "Did you load gold labels beforehand?")

        return gold_labels
Ejemplo n.º 2
0
    def convert_labels_to_matrix(labels: List[Dict[str, Any]],
                                 keys: List[str]) -> np.ndarray:
        """Convert labels (the output from LabelerUDF.apply) into a dense matrix.

        Note that the input labels are 0-indexed (``{0, 1, ..., k}``),
        while the output labels are -1-indexed (``{-1, 0, ..., k-1}``).

        :param labels: a list of label mapping (key: key, value=label).
        :param keys: a list of all keys.
        """
        return unshift_label_matrix(_convert_mappings_to_matrix(labels, keys))
Ejemplo n.º 3
0
    def get_label_matrices(self, cand_lists: List[List[Candidate]]) -> List[np.ndarray]:
        """Load dense matrix of Labels for each candidate_class.

        :param cand_lists: The candidates to get labels for.
        :return: A list of MxN dense matrix where M are the candidates and N is the
            labeling functions.
        """
        return [
            unshift_label_matrix(m)
            for m in get_sparse_matrix(self.session, LabelKey, cand_lists)
        ]
Ejemplo n.º 4
0
def _L_matrix(labels: List[Dict[str, Any]],
              key_names: List[str]) -> np.ndarray:
    """Convert labels (the output from LabelerUDF.apply) into a dense matrix.

    Note that :func:`LabelerUDF.apply` returns a list of list of label mapping,
    where the outer list represents candidate_classes, while this method takes a list
    of label mapping of each candidate_class.

    Also note that the input labels are 0-indexed (``{0, 1, ..., k}``),
    while the output labels are -1-indexed (``{-1, 0, ..., k-1}``).

    :param labels: a list of label mapping (key: key_name, value=label).
    :param key_names: a list of all key_names.
    """
    return unshift_label_matrix(_F_matrix(labels, key_names))
Ejemplo n.º 5
0
def test_shift_label_matrix(caplog):
    """Test the label matrix shifter and unshifter."""
    caplog.set_level(logging.INFO)
    """
    L is a dense label matrix (ABSTAIN as -1) with values:
    -1  0
     1 -1
    """
    L = np.array([[-1, 0], [1, -1]])
    """
    L_sparse is a sparse label matrix (ABSTAIN as 0)
     0  1
     2  0
    """
    L_sparse = shift_label_matrix(L)
    assert np.array_equal(L, unshift_label_matrix(L_sparse))
    assert L_sparse.count_nonzero() == 2
Ejemplo n.º 6
0
    def get_gold_labels(self,
                        cand_lists: List[List[Candidate]],
                        annotator: Optional[str] = None) -> List[np.ndarray]:
        """Load dense matrix of GoldLabels for each candidate_class.

        :param cand_lists: The candidates to get gold labels for.
        :type cand_lists: List of list of candidates.
        :param annotator: A specific annotator key to get labels for. Default
            None.
        :type annotator: str
        :return: A list of MxN dense matrix where M are the candidates and N is the
            annotators. If annotator is provided, return a list of Mx1 matrix.
        :rtype: list[np.ndarray]
        """
        return [
            unshift_label_matrix(m) for m in get_sparse_matrix(
                self.session, GoldLabelKey, cand_lists, key=annotator)
        ]