Python set_labels Examples, biome.text.vocabulary.set_labels Python Examples

Example #1

0

Show file

    def __init__(
        self, backbone: ModelBackbone, labels: List[str], multilabel: bool = False
    ):
        super(ClassificationHead, self).__init__(backbone)
        vocabulary.set_labels(self.backbone.vocab, labels)

        # label related configurations
        self._multilabel = multilabel
        self.calculate_output = (
            self.multi_label_output if self._multilabel else self.single_label_output
        )

        # metrics and loss
        if self._multilabel:
            self.metrics = {"macro": MultiLabelF1Measure()}
            self._loss = torch.nn.BCEWithLogitsLoss()
        else:
            self.metrics = {"accuracy": CategoricalAccuracy()}
            self.metrics.update(
                {
                    "micro": FBetaMeasure(average="micro"),
                    "macro": FBetaMeasure(average="macro"),
                    "per_label": FBetaMeasure(
                        labels=[i for i in range(0, len(labels))]
                    ),
                }
            )
            self._loss = torch.nn.CrossEntropyLoss()

Example #2

0

Show file

    def __init__(
        self,
        backbone: ModelBackbone,
        labels: List[str],
        multilabel: bool = False,
        label_weights: Optional[Union[List[float], Dict[str, float]]] = None,
    ):
        super().__init__(backbone)
        vocabulary.set_labels(self.backbone.vocab, labels)

        # label related configurations
        self._multilabel = multilabel

        # metrics and loss
        if isinstance(label_weights, list):
            label_weights = torch.tensor(label_weights, dtype=torch.float32)
        elif isinstance(label_weights, dict):
            label_weights = torch.tensor(
                [label_weights[label] for label in labels],
                dtype=torch.float32)
        if self._multilabel:
            self._loss = torch.nn.BCEWithLogitsLoss(weight=label_weights)
            self._metrics = Metrics(
                micro={
                    "type": "fbeta_multi_label",
                    "average": "micro"
                },
                macro={
                    "type": "fbeta_multi_label",
                    "average": "macro"
                },
                per_label={
                    "type": "fbeta_multi_label",
                    "labels": [i for i in range(len(labels))],
                },
            )
        else:
            self._loss = torch.nn.CrossEntropyLoss(weight=label_weights)
            self._metrics = Metrics(
                accuracy={"type": "categorical_accuracy"},
                micro={
                    "type": "fbeta",
                    "average": "micro"
                },
                macro={
                    "type": "fbeta",
                    "average": "macro"
                },
                per_label={
                    "type": "fbeta",
                    "labels": [i for i in range(len(labels))]
                },
            )

Example #3

0

Show file

File: token_classification.py Project: arunadevikaruppasamy/biome-text

    def __init__(
        self,
        backbone: ModelBackbone,
        labels: List[str],
        label_encoding: Optional[str] = "BIOUL",
        top_k: int = 1,
        dropout: Optional[float] = 0.0,
        feedforward: Optional[FeedForwardConfiguration] = None,
    ) -> None:
        super(TokenClassification, self).__init__(backbone)
        vocabulary.set_labels(self.backbone.vocab, labels)

        self.top_k = top_k
        self.dropout = torch.nn.Dropout(dropout)
        self._feedforward: FeedForward = (
            None if not feedforward else feedforward.input_dim(
                backbone.encoder.get_output_dim()).compile())
        # output layers
        self._classifier_input_dim = (self._feedforward.get_output_dim()
                                      if self._feedforward else
                                      backbone.encoder.get_output_dim())
        # we want this linear applied to each token in the sequence
        self._label_projection_layer = TimeDistributed(
            torch.nn.Linear(self._classifier_input_dim, self.num_labels))
        constraints = allowed_transitions(
            label_encoding,
            vocabulary.get_index_to_labels_dictionary(self.backbone.vocab),
        )
        self._crf = ConditionalRandomField(self.num_labels,
                                           constraints,
                                           include_start_end_transitions=True)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        if self.top_k:
            self.metrics.update({
                f"accuracy_{self.top_k}":
                CategoricalAccuracy(top_k=self.top_k)
            })
        self.f1_metric = SpanBasedF1Measure(
            self.backbone.vocab,
            tag_namespace=vocabulary.LABELS_NAMESPACE,
            label_encoding=label_encoding,
        )

        self.__all_metrics = [self.f1_metric]
        self.__all_metrics.extend(self.metrics.values())

Example #4

0

Show file

    def __init__(
        self,
        backbone: ModelBackbone,
        labels: List[str],
        label_encoding: Optional[str] = "BIOUL",
        top_k: int = 1,
        dropout: Optional[float] = 0.0,
        feedforward: Optional[FeedForwardConfiguration] = None,
    ) -> None:
        super(TokenClassification, self).__init__(backbone)

        if label_encoding not in ["BIOUL", "BIO"]:
            raise WrongValueError(
                f"Label encoding {label_encoding} not supported. Allowed values are {['BIOUL', 'BIO']}"
            )

        self._span_labels = labels
        self._label_encoding = label_encoding

        vocabulary.set_labels(
            self.backbone.vocab,
            # Convert span labels to tag labels if necessary
            # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes,
            # an unlikely label for spans
            span_labels_to_tag_labels(labels, self._label_encoding),
        )

        self.top_k = top_k
        self.dropout = torch.nn.Dropout(dropout)
        self._feedforward: FeedForward = (
            None if not feedforward else feedforward.input_dim(
                backbone.encoder.get_output_dim()).compile())
        # output layers
        self._classifier_input_dim = (self._feedforward.get_output_dim()
                                      if self._feedforward else
                                      backbone.encoder.get_output_dim())
        # we want this linear applied to each token in the sequence
        self._label_projection_layer = TimeDistributed(
            torch.nn.Linear(self._classifier_input_dim, self.num_labels))
        constraints = allowed_transitions(
            self._label_encoding,
            vocabulary.get_index_to_labels_dictionary(self.backbone.vocab),
        )

        self._crf = ConditionalRandomField(self.num_labels,
                                           constraints,
                                           include_start_end_transitions=True)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        if self.top_k > 1:
            self.metrics.update({
                f"accuracy_{self.top_k}":
                CategoricalAccuracy(top_k=self.top_k)
            })
        self.f1_metric = SpanBasedF1Measure(
            self.backbone.vocab,
            tag_namespace=vocabulary.LABELS_NAMESPACE,
            label_encoding=self._label_encoding,
        )

        self.__all_metrics = [self.f1_metric]
        self.__all_metrics.extend(self.metrics.values())

Example #5

0

Show file

File: token_classification.py Project: radovankavicky/biome-text

    def __init__(
        self,
        backbone: ModelBackbone,
        labels: List[str],
        label_encoding: Optional[str] = "BIOUL",
        top_k: int = 1,
        dropout: Optional[float] = 0.0,
        feedforward: Optional[FeedForwardConfiguration] = None,
    ) -> None:
        super(TokenClassification, self).__init__(backbone)

        self._label_encoding = label_encoding

        # Convert span labels to tag labels if necessary
        # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes, an unlikely label for spans
        if "O" not in labels and "o" not in labels:
            labels = span_labels_to_tag_labels(labels, self._label_encoding)
        # Issue a warning if you have the "O" tag but no other BIO/BIOUL looking tags.
        elif not any([
                label.lower().startswith(tag) for label in labels
                for tag in ["b-", "i-"]
        ]):
            self.__LOGGER.warning(
                "We interpreted the 'O' label as tag label, but did not find a 'B' or 'I' tag."
                "Make sure your tag labels comply with the BIO/BIOUL tagging scheme."
            )

        vocabulary.set_labels(self.backbone.vocab, labels)

        self.top_k = top_k
        self.dropout = torch.nn.Dropout(dropout)
        self._feedforward: FeedForward = (
            None if not feedforward else feedforward.input_dim(
                backbone.encoder.get_output_dim()).compile())
        # output layers
        self._classifier_input_dim = (self._feedforward.get_output_dim()
                                      if self._feedforward else
                                      backbone.encoder.get_output_dim())
        # we want this linear applied to each token in the sequence
        self._label_projection_layer = TimeDistributed(
            torch.nn.Linear(self._classifier_input_dim, self.num_labels))
        constraints = allowed_transitions(
            self._label_encoding,
            vocabulary.get_index_to_labels_dictionary(self.backbone.vocab),
        )
        self._crf = ConditionalRandomField(self.num_labels,
                                           constraints,
                                           include_start_end_transitions=True)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        if self.top_k:
            self.metrics.update({
                f"accuracy_{self.top_k}":
                CategoricalAccuracy(top_k=self.top_k)
            })
        self.f1_metric = SpanBasedF1Measure(
            self.backbone.vocab,
            tag_namespace=vocabulary.LABELS_NAMESPACE,
            label_encoding=self._label_encoding,
        )

        self.__all_metrics = [self.f1_metric]
        self.__all_metrics.extend(self.metrics.values())

Example #6

0

Show file

File: token_classification.py Project: recognai/biome-text

    def __init__(
        self,
        backbone: ModelBackbone,
        labels: List[str],
        label_encoding: Optional[str] = "BIOUL",
        top_k: int = 1,
        dropout: Optional[float] = 0.0,
        feedforward: Optional[FeedForwardConfiguration] = None,
    ) -> None:
        super().__init__(backbone)

        self._empty_prediction = TokenClassificationPrediction(tags=[[]],
                                                               entities=[[]],
                                                               scores=[])

        if label_encoding not in ["BIOUL", "BIO"]:
            raise WrongValueError(
                f"Label encoding {label_encoding} not supported. Allowed values are {['BIOUL', 'BIO']}"
            )

        self._span_labels = labels
        self._label_encoding = label_encoding

        vocabulary.set_labels(
            self.backbone.vocab,
            # Convert span labels to tag labels if necessary
            # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes,
            # an unlikely label for spans
            span_labels_to_tag_labels(labels, self._label_encoding),
        )

        self.top_k = top_k
        self.dropout = torch.nn.Dropout(dropout)
        self._feedforward: FeedForward = (
            None if not feedforward else feedforward.input_dim(
                backbone.encoder.get_output_dim()).compile())
        # output layers
        self._classifier_input_dim = (self._feedforward.get_output_dim()
                                      if self._feedforward else
                                      backbone.encoder.get_output_dim())
        # we want this linear applied to each token in the sequence
        self._label_projection_layer = TimeDistributed(
            torch.nn.Linear(self._classifier_input_dim, self.num_labels))
        constraints = allowed_transitions(
            self._label_encoding,
            vocabulary.get_index_to_labels_dictionary(self.backbone.vocab),
        )

        self._crf = ConditionalRandomField(self.num_labels,
                                           constraints,
                                           include_start_end_transitions=True)

        # There is no top_k option for the f1 metric, it will always only take into account the first choice
        # If you want to use top_k in the accuracy, you have to change the way we convert the CRF output to logits!
        self._metrics = Metrics(
            accuracy={"type": "categorical_accuracy"},
            f1={
                "type": "span_f1",
                "vocabulary": self.backbone.vocab,
                "tag_namespace": vocabulary.LABELS_NAMESPACE,
                "label_encoding": self._label_encoding,
            },
        )