def __init__( self, backbone: ModelBackbone, labels: List[str], multilabel: bool = False ): super(ClassificationHead, self).__init__(backbone) vocabulary.set_labels(self.backbone.vocab, labels) # label related configurations self._multilabel = multilabel self.calculate_output = ( self.multi_label_output if self._multilabel else self.single_label_output ) # metrics and loss if self._multilabel: self.metrics = {"macro": MultiLabelF1Measure()} self._loss = torch.nn.BCEWithLogitsLoss() else: self.metrics = {"accuracy": CategoricalAccuracy()} self.metrics.update( { "micro": FBetaMeasure(average="micro"), "macro": FBetaMeasure(average="macro"), "per_label": FBetaMeasure( labels=[i for i in range(0, len(labels))] ), } ) self._loss = torch.nn.CrossEntropyLoss()
def __init__( self, backbone: ModelBackbone, labels: List[str], multilabel: bool = False, label_weights: Optional[Union[List[float], Dict[str, float]]] = None, ): super().__init__(backbone) vocabulary.set_labels(self.backbone.vocab, labels) # label related configurations self._multilabel = multilabel # metrics and loss if isinstance(label_weights, list): label_weights = torch.tensor(label_weights, dtype=torch.float32) elif isinstance(label_weights, dict): label_weights = torch.tensor( [label_weights[label] for label in labels], dtype=torch.float32) if self._multilabel: self._loss = torch.nn.BCEWithLogitsLoss(weight=label_weights) self._metrics = Metrics( micro={ "type": "fbeta_multi_label", "average": "micro" }, macro={ "type": "fbeta_multi_label", "average": "macro" }, per_label={ "type": "fbeta_multi_label", "labels": [i for i in range(len(labels))], }, ) else: self._loss = torch.nn.CrossEntropyLoss(weight=label_weights) self._metrics = Metrics( accuracy={"type": "categorical_accuracy"}, micro={ "type": "fbeta", "average": "micro" }, macro={ "type": "fbeta", "average": "macro" }, per_label={ "type": "fbeta", "labels": [i for i in range(len(labels))] }, )
def __init__( self, backbone: ModelBackbone, labels: List[str], label_encoding: Optional[str] = "BIOUL", top_k: int = 1, dropout: Optional[float] = 0.0, feedforward: Optional[FeedForwardConfiguration] = None, ) -> None: super(TokenClassification, self).__init__(backbone) vocabulary.set_labels(self.backbone.vocab, labels) self.top_k = top_k self.dropout = torch.nn.Dropout(dropout) self._feedforward: FeedForward = ( None if not feedforward else feedforward.input_dim( backbone.encoder.get_output_dim()).compile()) # output layers self._classifier_input_dim = (self._feedforward.get_output_dim() if self._feedforward else backbone.encoder.get_output_dim()) # we want this linear applied to each token in the sequence self._label_projection_layer = TimeDistributed( torch.nn.Linear(self._classifier_input_dim, self.num_labels)) constraints = allowed_transitions( label_encoding, vocabulary.get_index_to_labels_dictionary(self.backbone.vocab), ) self._crf = ConditionalRandomField(self.num_labels, constraints, include_start_end_transitions=True) self.metrics = {"accuracy": CategoricalAccuracy()} if self.top_k: self.metrics.update({ f"accuracy_{self.top_k}": CategoricalAccuracy(top_k=self.top_k) }) self.f1_metric = SpanBasedF1Measure( self.backbone.vocab, tag_namespace=vocabulary.LABELS_NAMESPACE, label_encoding=label_encoding, ) self.__all_metrics = [self.f1_metric] self.__all_metrics.extend(self.metrics.values())
def __init__( self, backbone: ModelBackbone, labels: List[str], label_encoding: Optional[str] = "BIOUL", top_k: int = 1, dropout: Optional[float] = 0.0, feedforward: Optional[FeedForwardConfiguration] = None, ) -> None: super(TokenClassification, self).__init__(backbone) if label_encoding not in ["BIOUL", "BIO"]: raise WrongValueError( f"Label encoding {label_encoding} not supported. Allowed values are {['BIOUL', 'BIO']}" ) self._span_labels = labels self._label_encoding = label_encoding vocabulary.set_labels( self.backbone.vocab, # Convert span labels to tag labels if necessary # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes, # an unlikely label for spans span_labels_to_tag_labels(labels, self._label_encoding), ) self.top_k = top_k self.dropout = torch.nn.Dropout(dropout) self._feedforward: FeedForward = ( None if not feedforward else feedforward.input_dim( backbone.encoder.get_output_dim()).compile()) # output layers self._classifier_input_dim = (self._feedforward.get_output_dim() if self._feedforward else backbone.encoder.get_output_dim()) # we want this linear applied to each token in the sequence self._label_projection_layer = TimeDistributed( torch.nn.Linear(self._classifier_input_dim, self.num_labels)) constraints = allowed_transitions( self._label_encoding, vocabulary.get_index_to_labels_dictionary(self.backbone.vocab), ) self._crf = ConditionalRandomField(self.num_labels, constraints, include_start_end_transitions=True) self.metrics = {"accuracy": CategoricalAccuracy()} if self.top_k > 1: self.metrics.update({ f"accuracy_{self.top_k}": CategoricalAccuracy(top_k=self.top_k) }) self.f1_metric = SpanBasedF1Measure( self.backbone.vocab, tag_namespace=vocabulary.LABELS_NAMESPACE, label_encoding=self._label_encoding, ) self.__all_metrics = [self.f1_metric] self.__all_metrics.extend(self.metrics.values())
def __init__( self, backbone: ModelBackbone, labels: List[str], label_encoding: Optional[str] = "BIOUL", top_k: int = 1, dropout: Optional[float] = 0.0, feedforward: Optional[FeedForwardConfiguration] = None, ) -> None: super(TokenClassification, self).__init__(backbone) self._label_encoding = label_encoding # Convert span labels to tag labels if necessary # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes, an unlikely label for spans if "O" not in labels and "o" not in labels: labels = span_labels_to_tag_labels(labels, self._label_encoding) # Issue a warning if you have the "O" tag but no other BIO/BIOUL looking tags. elif not any([ label.lower().startswith(tag) for label in labels for tag in ["b-", "i-"] ]): self.__LOGGER.warning( "We interpreted the 'O' label as tag label, but did not find a 'B' or 'I' tag." "Make sure your tag labels comply with the BIO/BIOUL tagging scheme." ) vocabulary.set_labels(self.backbone.vocab, labels) self.top_k = top_k self.dropout = torch.nn.Dropout(dropout) self._feedforward: FeedForward = ( None if not feedforward else feedforward.input_dim( backbone.encoder.get_output_dim()).compile()) # output layers self._classifier_input_dim = (self._feedforward.get_output_dim() if self._feedforward else backbone.encoder.get_output_dim()) # we want this linear applied to each token in the sequence self._label_projection_layer = TimeDistributed( torch.nn.Linear(self._classifier_input_dim, self.num_labels)) constraints = allowed_transitions( self._label_encoding, vocabulary.get_index_to_labels_dictionary(self.backbone.vocab), ) self._crf = ConditionalRandomField(self.num_labels, constraints, include_start_end_transitions=True) self.metrics = {"accuracy": CategoricalAccuracy()} if self.top_k: self.metrics.update({ f"accuracy_{self.top_k}": CategoricalAccuracy(top_k=self.top_k) }) self.f1_metric = SpanBasedF1Measure( self.backbone.vocab, tag_namespace=vocabulary.LABELS_NAMESPACE, label_encoding=self._label_encoding, ) self.__all_metrics = [self.f1_metric] self.__all_metrics.extend(self.metrics.values())
def __init__( self, backbone: ModelBackbone, labels: List[str], label_encoding: Optional[str] = "BIOUL", top_k: int = 1, dropout: Optional[float] = 0.0, feedforward: Optional[FeedForwardConfiguration] = None, ) -> None: super().__init__(backbone) self._empty_prediction = TokenClassificationPrediction(tags=[[]], entities=[[]], scores=[]) if label_encoding not in ["BIOUL", "BIO"]: raise WrongValueError( f"Label encoding {label_encoding} not supported. Allowed values are {['BIOUL', 'BIO']}" ) self._span_labels = labels self._label_encoding = label_encoding vocabulary.set_labels( self.backbone.vocab, # Convert span labels to tag labels if necessary # We just check if "O" is in the label list, a necessary tag for IOB/BIOUL schemes, # an unlikely label for spans span_labels_to_tag_labels(labels, self._label_encoding), ) self.top_k = top_k self.dropout = torch.nn.Dropout(dropout) self._feedforward: FeedForward = ( None if not feedforward else feedforward.input_dim( backbone.encoder.get_output_dim()).compile()) # output layers self._classifier_input_dim = (self._feedforward.get_output_dim() if self._feedforward else backbone.encoder.get_output_dim()) # we want this linear applied to each token in the sequence self._label_projection_layer = TimeDistributed( torch.nn.Linear(self._classifier_input_dim, self.num_labels)) constraints = allowed_transitions( self._label_encoding, vocabulary.get_index_to_labels_dictionary(self.backbone.vocab), ) self._crf = ConditionalRandomField(self.num_labels, constraints, include_start_end_transitions=True) # There is no top_k option for the f1 metric, it will always only take into account the first choice # If you want to use top_k in the accuracy, you have to change the way we convert the CRF output to logits! self._metrics = Metrics( accuracy={"type": "categorical_accuracy"}, f1={ "type": "span_f1", "vocabulary": self.backbone.vocab, "tag_namespace": vocabulary.LABELS_NAMESPACE, "label_encoding": self._label_encoding, }, )