def test_fbeta_multiclass_with_mask(self): mask = torch.Tensor([1, 1, 1, 1, 1, 0]) fbeta = FBetaMeasure() fbeta(self.predictions, self.targets, mask) metric = fbeta.get_metric() precisions = metric['precision'] recalls = metric['recall'] fscores = metric['fscore'] numpy.testing.assert_almost_equal(fbeta._pred_sum.tolist(), [1, 3, 0, 1, 0]) numpy.testing.assert_almost_equal(fbeta._true_sum.tolist(), [2, 1, 0, 1, 1]) numpy.testing.assert_almost_equal(fbeta._true_positive_sum.tolist(), [1, 1, 0, 1, 0]) desired_precisions = [1.00, 0.33, 0.00, 1.00, 0.00] desired_recalls = [0.50, 1.00, 0.00, 1.00, 0.00] desired_fscores = [(2 * p * r) / (p + r) if p + r != 0.0 else 0.0 for p, r in zip(desired_precisions, desired_recalls) ] numpy.testing.assert_almost_equal(precisions, desired_precisions, decimal=2) numpy.testing.assert_almost_equal(recalls, desired_recalls, decimal=2) numpy.testing.assert_almost_equal(fscores, desired_fscores, decimal=2)
def test_fbeta_multiclass_with_micro_average(self): labels = [1, 3] fbeta = FBetaMeasure(average="micro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] true_positives = [1, 1] false_positives = [3, 0] false_negatives = [0, 0] mean_true_positive = numpy.mean(true_positives) mean_false_positive = numpy.mean(false_positives) mean_false_negative = numpy.mean(false_negatives) micro_precision = mean_true_positive / (mean_true_positive + mean_false_positive) micro_recall = mean_true_positive / (mean_true_positive + mean_false_negative) micro_fscore = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall) # check value numpy.testing.assert_almost_equal(precisions, micro_precision, decimal=2) numpy.testing.assert_almost_equal(recalls, micro_recall, decimal=2) numpy.testing.assert_almost_equal(fscores, micro_fscore, decimal=2)
def test_fbeta_multiclass_with_mask(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) mask = torch.tensor([True, True, True, True, True, False], device=device) fbeta = FBetaMeasure() fbeta(self.predictions, self.targets, mask) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] assert_allclose(fbeta._pred_sum.tolist(), [1, 3, 0, 1, 0]) assert_allclose(fbeta._true_sum.tolist(), [2, 1, 0, 1, 1]) assert_allclose(fbeta._true_positive_sum.tolist(), [1, 1, 0, 1, 0]) desired_precisions = [1.00, 1 / 3, 0.00, 1.00, 0.00] desired_recalls = [0.50, 1.00, 0.00, 1.00, 0.00] desired_fscores = [ (2 * p * r) / (p + r) if p + r != 0.0 else 0.0 for p, r in zip(desired_precisions, desired_recalls) ] assert_allclose(precisions, desired_precisions) assert_allclose(recalls, desired_recalls) assert_allclose(fscores, desired_fscores)
def test_fbeta_multiclass_micro_average_metric(self): fbeta = FBetaMeasure(average='micro') fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric['precision'] recalls = metric['recall'] fscores = metric['fscore'] true_positives = [1, 1, 0, 1, 0] false_positives = [0, 3, 0, 0, 0] false_negatives = [2, 0, 0, 0, 1] mean_true_positive = numpy.mean(true_positives) mean_false_positive = numpy.mean(false_positives) mean_false_negative = numpy.mean(false_negatives) micro_precision = mean_true_positive / (mean_true_positive + mean_false_positive) micro_recall = mean_true_positive / (mean_true_positive + mean_false_negative) micro_fscore = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall) # check value numpy.testing.assert_almost_equal(precisions, micro_precision, decimal=2) numpy.testing.assert_almost_equal(recalls, micro_recall, decimal=2) numpy.testing.assert_almost_equal(fscores, micro_fscore, decimal=2)
def test_fbeta_multiclass_with_micro_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [1, 3] fbeta = FBetaMeasure(average="micro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # We keep the expected values in CPU because FBetaMeasure returns them in CPU. true_positives = torch.tensor([1, 1], dtype=torch.float32) false_positives = torch.tensor([3, 0], dtype=torch.float32) false_negatives = torch.tensor([0, 0], dtype=torch.float32) mean_true_positive = true_positives.mean() mean_false_positive = false_positives.mean() mean_false_negative = false_negatives.mean() micro_precision = mean_true_positive / (mean_true_positive + mean_false_positive) micro_recall = mean_true_positive / (mean_true_positive + mean_false_negative) micro_fscore = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall) # check value assert_allclose(precisions, micro_precision) assert_allclose(recalls, micro_recall) assert_allclose(fscores, micro_fscore)
class VisualEntailmentHead(Head): def __init__(self, vocab: Vocabulary, embedding_dim: int, label_namespace: str = "labels"): super().__init__(vocab) num_labels = vocab.get_vocab_size(label_namespace) self.label_namespace = label_namespace self.classifier = torch.nn.Linear(embedding_dim, num_labels) from allennlp.training.metrics import CategoricalAccuracy from allennlp.training.metrics import FBetaMeasure self.accuracy = CategoricalAccuracy() self.fbeta = FBetaMeasure(beta=1.0, average="macro") @overrides def forward( self, # type: ignore encoded_boxes: torch.Tensor, encoded_boxes_mask: torch.Tensor, encoded_boxes_pooled: torch.Tensor, encoded_text: torch.Tensor, encoded_text_mask: torch.Tensor, encoded_text_pooled: torch.Tensor, pooled_boxes_and_text: torch.Tensor, labels: Optional[torch.Tensor] = None, label_weights: Optional[torch.Tensor] = None, ) -> Dict[str, torch.Tensor]: logits = self.classifier(pooled_boxes_and_text) probs = torch.softmax(logits, dim=-1) output = {"logits": logits, "probs": probs} assert label_weights is None if labels is not None: output["loss"] = torch.nn.functional.cross_entropy(logits, labels) / logits.size(0) self.accuracy(logits, labels) self.fbeta(probs, labels) return output @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: result = self.fbeta.get_metric(reset) result["acc"] = self.accuracy.get_metric(reset) return result def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor] ) -> Dict[str, torch.Tensor]: if len(output_dict) <= 0: return output_dict logits = output_dict["logits"] entailment_answer_index = logits.argmax(-1) entailment_answer = [ self.vocab.get_token_from_index(int(i), "labels") for i in entailment_answer_index ] output_dict["entailment_answer"] = entailment_answer return output_dict default_predictor = "vilbert_ve"
class RationaleBaseModel(Model): def __init__( self, vocab: Vocabulary, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ): super(RationaleBaseModel, self).__init__(vocab, regularizer) self._vocabulary = vocab self._f1_metric = FBetaMeasure() self._accuracy = CategoricalAccuracy() self.prediction_mode = False initializer(self) def forward(self, document, sentence_indices, query=None, labels=None, metadata=None): raise NotImplementedError def decode(self, output_dict): output_dict = self._decode(output_dict) return output_dict def _call_metrics(self, output_dict) : self._f1_metric(output_dict['logits'], output_dict['gold_labels']) self._accuracy(output_dict['logits'], output_dict['gold_labels']) def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = self._f1_metric.get_metric(reset) output_labels = self._vocabulary.get_index_to_token_vocabulary("labels") output_labels = [output_labels[i] for i in range(len(output_labels))] class_metrics = {} for k, v in metrics.items(): assert len(v) == len(output_labels) class_nums = dict(zip(output_labels, v)) class_metrics.update({k + "_" + str(kc): x for kc, x in class_nums.items()}) class_metrics.update({"accuracy": self._accuracy.get_metric(reset)}) modified_class_metrics = {} for k, v in class_metrics.items() : if k.endswith('_1') or k == 'accuracy': modified_class_metrics[k] = v else : modified_class_metrics['_' + k] = v return modified_class_metrics def normalize_attentions(self, output_dict) : ''' In case, attention is over subtokens rather than at token level. Combine subtoken attention into token attention. ''' return output_dict
class NLIModel(Model): default_predictor = "NLIPredictor" def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder = None, encoder: Seq2VecEncoder = None, dropout: float = 0.3): super().__init__(vocab) self.embedder = embedder self.encoder = encoder or BertCLSPooler(self.embedder.get_output_dim()) self.dropout = nn.Dropout(dropout) num_classes = self.vocab.get_vocab_size("labels") assert num_classes > 0, "Wrong namespace for labels apparently" self.clf = nn.Linear(self.encoder.get_output_dim(), num_classes) self.accuracy = CategoricalAccuracy() assert num_classes == 2 or num_classes == 3 labels = list(range(num_classes)) self.f1 = FBetaMeasure(average=None, labels=labels) def forward(self, tokens: Dict[str, Dict[str, torch.LongTensor]], labels: torch.LongTensor = None, **kwargs) -> Dict[str, torch.Tensor]: mask = get_text_field_mask(tokens) embedded = self.embedder(tokens) embedded = self.dropout(embedded) encoded_cls = self.encoder(embedded, mask) logits = self.clf(encoded_cls) # logits - batch_size, num_classes output_dict = {"logits": logits} if labels is not None: # labels - batch_size labels = labels.view(-1) loss = cross_entropy(logits, labels) output_dict["loss"] = loss self.accuracy(logits, labels) self.f1(logits, labels) return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = {} acc: float = self.accuracy.get_metric(reset) metrics["accuracy"] = acc f1 = self.f1.get_metric(reset) for name, idx in self.vocab.get_token_to_index_vocabulary( "labels").items(): for metric_name, value in f1.items(): metrics[name + "_" + metric_name] = value[idx] return metrics def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, Any]: return output_dict
def test_fbeta_handles_batch_size_of_one(self, device: str): predictions = torch.tensor([[0.2862, 0.3479, 0.1627, 0.2033]], device=device) targets = torch.tensor([1], device=device) mask = torch.tensor([True], device=device) fbeta = FBetaMeasure() fbeta(predictions, targets, mask) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] assert_allclose(precisions, [0.0, 1.0, 0.0, 0.0]) assert_allclose(recalls, [0.0, 1.0, 0.0, 0.0])
def test_fbeta_handles_batch_size_of_one(self): predictions = torch.Tensor([[0.2862, 0.3479, 0.1627, 0.2033]]) targets = torch.Tensor([1]) mask = torch.Tensor([1]) fbeta = FBetaMeasure() fbeta(predictions, targets, mask) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] numpy.testing.assert_almost_equal(precisions, [0.0, 1.0, 0.0, 0.0]) numpy.testing.assert_almost_equal(recalls, [0.0, 1.0, 0.0, 0.0])
def test_fbeta_handles_no_prediction_true_all_class(self, device: str): predictions = torch.tensor([[0.65, 0.35], [0.0, 0.0]], device=device) # preds = [0, NA] targets = torch.tensor([1, 1], device=device) fbeta = FBetaMeasure() fbeta(predictions, targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] assert_allclose(precisions, [0.0, 0.0]) assert_allclose(recalls, [0.0, 0.0]) assert_allclose(fscores, [0.0, 0.0])
def test_fbeta_multiclass_with_explicit_labels(self): # same prediction but with and explicit label ordering fbeta = FBetaMeasure(labels=[4, 3, 2, 1, 0]) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric['precision'] recalls = metric['recall'] fscores = metric['fscore'] desired_precisions = self.desired_precisions[::-1] desired_recalls = self.desired_recalls[::-1] desired_fscores = self.desired_fscores[::-1] # check value numpy.testing.assert_almost_equal(precisions, desired_precisions, decimal=2) numpy.testing.assert_almost_equal(recalls, desired_recalls, decimal=2) numpy.testing.assert_almost_equal(fscores, desired_fscores, decimal=2)
def test_fbeta_multiclass_with_explicit_labels(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) # same prediction but with and explicit label ordering fbeta = FBetaMeasure(labels=[4, 3, 2, 1, 0]) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] desired_precisions = self.desired_precisions[::-1] desired_recalls = self.desired_recalls[::-1] desired_fscores = self.desired_fscores[::-1] # check value assert_allclose(precisions, desired_precisions) assert_allclose(recalls, desired_recalls) assert_allclose(fscores, desired_fscores)
def test_fbeta_multiclass_with_macro_average(self): labels = [0, 1] fbeta = FBetaMeasure(average="macro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] macro_precision = numpy.array(self.desired_precisions)[labels].mean() macro_recall = numpy.array(self.desired_recalls)[labels].mean() macro_fscore = numpy.array(self.desired_fscores)[labels].mean() # check value numpy.testing.assert_almost_equal(precisions, macro_precision, decimal=2) numpy.testing.assert_almost_equal(recalls, macro_recall, decimal=2) numpy.testing.assert_almost_equal(fscores, macro_fscore, decimal=2)
def test_fbeta_multiclass_metric(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) fbeta = FBetaMeasure() fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # check value assert_allclose(precisions, self.desired_precisions) assert_allclose(recalls, self.desired_recalls) assert_allclose(fscores, self.desired_fscores) # check type assert isinstance(precisions, List) assert isinstance(recalls, List) assert isinstance(fscores, List)
def test_fbeta_multiclass_with_macro_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [0, 1] fbeta = FBetaMeasure(average="macro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # We keep the expected values in CPU because FBetaMeasure returns them in CPU. macro_precision = torch.tensor(self.desired_precisions)[labels].mean() macro_recall = torch.tensor(self.desired_recalls)[labels].mean() macro_fscore = torch.tensor(self.desired_fscores)[labels].mean() # check value assert_allclose(precisions, macro_precision) assert_allclose(recalls, macro_recall) assert_allclose(fscores, macro_fscore)
def multiple_runs( global_rank: int, world_size: int, gpu_id: Union[int, torch.device], metric: FBetaMeasure, metric_kwargs: Dict[str, List[Any]], desired_values: Dict[str, Any], exact: Union[bool, Tuple[float, float]] = True, ): kwargs = {} # Use the arguments meant for the process with rank `global_rank`. for argname in metric_kwargs: kwargs[argname] = metric_kwargs[argname][global_rank] for i in range(200): metric(**kwargs) metric_values = metric.get_metric() for key in desired_values: assert_allclose(desired_values[key], metric_values[key])
def test_fbeta_multiclass_marco_average_metric(self): fbeta = FBetaMeasure(average='macro') fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric['precision'] recalls = metric['recall'] fscores = metric['fscore'] macro_precision = numpy.mean(self.desired_precisions) macro_recall = numpy.mean(self.desired_recalls) macro_fscore = numpy.mean(self.desired_fscores) # check value numpy.testing.assert_almost_equal(precisions, macro_precision, decimal=2) numpy.testing.assert_almost_equal(recalls, macro_recall, decimal=2) numpy.testing.assert_almost_equal(fscores, macro_fscore, decimal=2) # check type assert isinstance(precisions, float) assert isinstance(recalls, float) assert isinstance(fscores, float)
def test_fbeta_multiclass_with_weighted_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [0, 1] fbeta = FBetaMeasure(average="weighted", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] weighted_precision, weighted_recall, weighted_fscore, _ = precision_recall_fscore_support( self.targets.cpu().numpy(), self.predictions.argmax(dim=1).cpu().numpy(), labels=labels, average="weighted", ) # check value assert_allclose(precisions, weighted_precision) assert_allclose(recalls, weighted_recall) assert_allclose(fscores, weighted_fscore)
def test_fbeta_multiclass_metric(self): fbeta = FBetaMeasure() fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric['precision'] recalls = metric['recall'] fscores = metric['fscore'] # check value numpy.testing.assert_almost_equal(precisions, self.desired_precisions, decimal=2) numpy.testing.assert_almost_equal(recalls, self.desired_recalls, decimal=2) numpy.testing.assert_almost_equal(fscores, self.desired_fscores, decimal=2) # check type assert isinstance(precisions, List) assert isinstance(recalls, List) assert isinstance(fscores, List)
class VisualEntailmentModel(VisionTextModel): """ Model for visual entailment task based on the paper [Visual Entailment: A Novel Task for Fine-Grained Image Understanding] (https://api.semanticscholar.org/CorpusID:58981654). # Parameters vocab : `Vocabulary` text_embeddings : `TransformerEmbeddings` image_embeddings : `ImageFeatureEmbeddings` encoder : `BiModalEncoder` pooled_output_dim : `int` fusion_method : `str`, optional (default = `"sum"`) dropout : `float`, optional (default = `0.1`) label_namespace : `str`, optional (default = `labels`) """ def __init__( self, vocab: Vocabulary, text_embeddings: TransformerEmbeddings, image_embeddings: ImageFeatureEmbeddings, encoder: BiModalEncoder, pooled_output_dim: int, fusion_method: str = "sum", dropout: float = 0.1, label_namespace: str = "labels", *, ignore_text: bool = False, ignore_image: bool = False, ) -> None: super().__init__( vocab, text_embeddings, image_embeddings, encoder, pooled_output_dim, fusion_method, dropout, label_namespace, is_multilabel=False, ) self.accuracy = CategoricalAccuracy() self.fbeta = FBetaMeasure(beta=1.0, average="macro") @overrides def forward( self, # type: ignore box_features: torch.Tensor, box_coordinates: torch.Tensor, box_mask: torch.Tensor, hypothesis: TextFieldTensors, labels: Optional[torch.Tensor] = None, ) -> Dict[str, torch.Tensor]: return super().forward( box_features, box_coordinates, box_mask, text=hypothesis, labels=labels, label_weights=None, ) @overrides def _compute_loss_and_metrics( self, batch_size: int, outputs: torch.Tensor, label: torch.Tensor, label_weights: Optional[torch.Tensor] = None, ): assert label_weights is None if label is not None: outputs["loss"] = ( torch.nn.functional.cross_entropy(outputs["logits"], label) / batch_size) self.accuracy(outputs["logits"], label) self.fbeta(outputs["probs"], label) return outputs @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = self.fbeta.get_metric(reset) accuracy = self.accuracy.get_metric(reset) metrics.update({"accuracy": accuracy}) return metrics @overrides def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: batch_labels = [] for batch_index, batch in enumerate(output_dict["probs"]): labels = np.argmax(batch, axis=-1) batch_labels.append(labels) output_dict["labels"] = batch_labels return output_dict default_predictor = "vilbert_ve"
class Nlvr2Model(VisionTextModel): """ Model for visual entailment task based on the paper [A Corpus for Reasoning About Natural Language Grounded in Photographs] (https://api.semanticscholar.org/CorpusID:53178856). # Parameters vocab : `Vocabulary` text_embeddings : `TransformerEmbeddings` image_embeddings : `ImageFeatureEmbeddings` encoder : `BiModalEncoder` pooled_output_dim : `int` fusion_method : `str`, optional (default = `"mul"`) dropout : `float`, optional (default = `0.1`) label_namespace : `str`, optional (default = `labels`) """ def __init__( self, vocab: Vocabulary, text_embeddings: TransformerEmbeddings, image_embeddings: ImageFeatureEmbeddings, encoder: BiModalEncoder, pooled_output_dim: int, fusion_method: str = "mul", dropout: float = 0.1, label_namespace: str = "labels", *, ignore_text: bool = False, ignore_image: bool = False, ) -> None: super().__init__( vocab, text_embeddings, image_embeddings, encoder, pooled_output_dim, fusion_method, dropout, label_namespace, is_multilabel=False, ) self.pooled_output_dim = pooled_output_dim self.layer1 = torch.nn.Linear(pooled_output_dim * 2, pooled_output_dim) self.layer2 = torch.nn.Linear(pooled_output_dim, 2) self.activation = torch.nn.ReLU() self.accuracy = CategoricalAccuracy() self.fbeta = FBetaMeasure(beta=1.0, average="macro") def forward( self, # type: ignore box_features: torch.Tensor, box_coordinates: torch.Tensor, box_mask: torch.Tensor, hypothesis: TextFieldTensors, label: Optional[torch.Tensor] = None, identifier: List[Dict[str, Any]] = None, ) -> Dict[str, torch.Tensor]: batch_size = box_features.shape[0] pooled_outputs = self.backbone( box_features, box_coordinates, box_mask, hypothesis)["pooled_boxes_and_text"].transpose(0, 1) hidden = self.layer1( torch.cat((pooled_outputs[0], pooled_outputs[1]), dim=-1)) # Shape: (batch_size, num_labels) logits = self.layer2(self.activation(hidden)) # Shape: (batch_size, num_labels) probs = torch.softmax(logits, dim=-1) outputs = {"logits": logits, "probs": probs} outputs = self._compute_loss_and_metrics(batch_size, outputs, label) return outputs def _compute_loss_and_metrics( self, batch_size: int, outputs: Dict[str, torch.Tensor], label: torch.Tensor, label_weights: Optional[torch.Tensor] = None, ) -> Dict[str, torch.Tensor]: if label_weights is not None: raise NotImplementedError( "This implementation does not support label_weights.") if label is not None: outputs["loss"] = ( torch.nn.functional.cross_entropy(outputs["logits"], label) / batch_size) self.accuracy(outputs["logits"], label) self.fbeta(outputs["probs"], label) return outputs def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = self.fbeta.get_metric(reset) accuracy = self.accuracy.get_metric(reset) metrics.update({"accuracy": accuracy}) return metrics def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: batch_labels = [] for batch_index, batch in enumerate(output_dict["probs"]): labels = np.argmax(batch, axis=-1) batch_labels.append(labels) output_dict["labels"] = batch_labels return output_dict default_predictor = "nlvr2"
class Nlvr2Head(Head): def __init__(self, vocab: Vocabulary, embedding_dim: int, label_namespace: str = "labels"): super().__init__(vocab) self.label_namespace = label_namespace self.layer1 = torch.nn.Linear(embedding_dim * 2, embedding_dim) self.layer2 = torch.nn.Linear(embedding_dim, 2) self.activation = torch.nn.ReLU() from allennlp.training.metrics import CategoricalAccuracy from allennlp.training.metrics import FBetaMeasure self.accuracy = CategoricalAccuracy() self.fbeta = FBetaMeasure(beta=1.0, average="macro") def forward( self, # type: ignore encoded_boxes: torch.Tensor, encoded_boxes_mask: torch.Tensor, encoded_boxes_pooled: torch.Tensor, encoded_text: torch.Tensor, encoded_text_mask: torch.Tensor, encoded_text_pooled: torch.Tensor, pooled_boxes_and_text: torch.Tensor, label: Optional[torch.Tensor] = None, label_weights: Optional[torch.Tensor] = None, ) -> Dict[str, torch.Tensor]: pooled_boxes_and_text = pooled_boxes_and_text.transpose(0, 1) hidden = self.layer1( torch.cat((pooled_boxes_and_text[0], pooled_boxes_and_text[1]), dim=-1)) logits = self.layer2(self.activation(hidden)) probs = torch.softmax(logits, dim=-1) output = {"logits": logits, "probs": probs} assert label_weights is None if label is not None: output["loss"] = torch.nn.functional.cross_entropy( logits, label) / logits.size(0) self.accuracy(logits, label) self.fbeta(probs, label) return output def get_metrics(self, reset: bool = False) -> Dict[str, float]: result = self.fbeta.get_metric(reset) result["accuracy"] = self.accuracy.get_metric(reset) return result def make_output_human_readable( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: if len(output_dict) <= 0: return output_dict logits = output_dict["logits"] entailment_answer_index = logits.argmax(-1) entailment_answer = [ self.vocab.get_token_from_index(int(i), "labels") for i in entailment_answer_index ] output_dict["entailment_answer"] = entailment_answer return output_dict default_predictor = "nlvr2"
class PrePruner(Model): def __init__(self, vocab, feature_size: int, max_span_width: int, keep_rate: int, mlp_dropout: float = 0.4, embedder_type=None, regularizer: Optional[RegularizerApplicator] = None) -> None: super(PrePruner, self).__init__(vocab, regularizer) self.keep_rate = keep_rate self.embedder = get_embeddings(embedder_type, self.vocab) self.ffn = FeedForward(300, 2, 300, F.relu, 0.5) embedding_dim = self.embedder.get_output_dim() self._span_extractor = PoolingSpanExtractor( embedding_dim, num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) entity_feedforward = FeedForward(self._span_extractor.get_output_dim(), 2, 150, F.relu, mlp_dropout) self.feedforward_scorer = torch.nn.Sequential( TimeDistributed(entity_feedforward), TimeDistributed( torch.nn.Linear(entity_feedforward.get_output_dim(), 1)), ) self._lexical_dropout = torch.nn.Dropout(p=0.1) self.loss = torch.nn.BCELoss() self._metric_f1 = FBetaMeasure() def forward(self, text: Dict[str, torch.LongTensor], spans: torch.IntTensor, labels: torch.IntTensor = None, **kwargs): text_embeddings = self._lexical_dropout(self.embedder(text)) # Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1).float() spans = F.relu(spans.float()).long() span_embeddings = self._span_extractor(text_embeddings, spans, span_indices_mask=span_mask) span_scores = self.feedforward_scorer(span_embeddings) span_scores = span_scores.squeeze(-1) span_scores += span_mask.log() span_scores = span_scores.sigmoid() topk_idx = torch.topk(span_scores, int(self.keep_rate * spans.shape[1]))[-1] predict_true = span_scores.new_zeros(span_scores.shape).scatter_( 1, topk_idx, 1).bool() is_entity = (labels != 0).float() span_scores = span_scores.reshape(-1) is_entity = is_entity.reshape(-1) loss = self.loss(span_scores, is_entity) predict_true_flatten = predict_true.reshape(-1) predict_true_flatten = predict_true_flatten.unsqueeze(-1) predict_false_flatten = ~predict_true_flatten predict = torch.cat([predict_false_flatten, predict_true_flatten], -1) self._metric_f1(predict, is_entity, mask=span_mask.reshape(-1)) predict_true |= labels.bool() output_dict = {"loss": loss, "predict_true": predict_true} return output_dict @overrides def get_metrics(self, reset: bool = False): metric = self._metric_f1.get_metric(reset) metric['precision'] = metric['precision'][1] metric['recall'] = metric['recall'][1] metric['fscore'] = metric['fscore'][1] return metric
class Seq2SeqKnu(SimpleSeq2Seq): def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, target_namespace, decoder, attention, max_decoding_steps: int, cuda_device: int): super().__init__(vocab, source_embedder, encoder, max_decoding_steps, use_bleu=False) self._decoder = decoder self._attention = attention self.acc = CategoricalAccuracy() self.label_acc = BooleanAccuracy() self.f1 = FBetaMeasure(average="macro") self.cuda_device = cuda_device self._target_namespace = target_namespace num_classes = self.vocab.get_vocab_size(self._target_namespace) # hidden 是decoder_hidden, attended_output 和 encoder_slice的拼接, 所以需要 * 3 self._output_projection_layer = Linear(self._decoder_output_dim * 3, num_classes) def forward( self, source_tokens: Dict[str, torch.LongTensor], gold_mentions, target_tokens: Dict[str, torch.LongTensor] = None ) -> Dict[str, torch.Tensor]: """ :param source_tokens: sentence序列化后 :param gold_mentions: 表示第几个位置是mention :param target_tokens: mention对应的target :return: """ # (batch_size, max_sentence_length, embedding_dim) state = self._encode(source_tokens) output_dict = self._forward_loop(state, gold_mentions, target_tokens) if not self.training: if target_tokens: logits = output_dict['logits'] mention_mask = output_dict['mention_mask'] target = target_tokens['tokens'] predictions = output_dict['predictions'] class_probs = output_dict['class_probs'] self.label_acc(predictions, target, mention_mask) self.acc(logits, target, mention_mask) self.f1(class_probs, target, mention_mask) return output_dict def _forward_loop( self, state: Dict[str, torch.Tensor], gold_mentions: torch.LongTensor, target_tokens: Dict[str, torch.LongTensor] = None ) -> Dict[str, torch.Tensor]: # shape: (batch_size, max_input_sequence_length) source_mask = state["source_mask"] # shape: (batch_size, max_input_sequence_length, embedding_dim) encoder_outputs = state['encoder_outputs'] batch_size = source_mask.size()[0] max_input_sequence_length = source_mask.size()[1] # 下面两步将gold_mention用0扩充到 (batch_size, max_input_sequence_length) gold_mentions_expanded = torch.zeros( batch_size, max_input_sequence_length).cuda(self.cuda_device) gold_mentions_expanded[:, :gold_mentions.size()[1]] = gold_mentions # 通过get_text_field_mask, 用0-1表示当前位置是否有效 # shape: (batch_size, mac_input_sequence_length) mention_mask = util.get_text_field_mask( {'gold_mentions': gold_mentions_expanded}) for b in range(batch_size): encoder_output = encoder_outputs[b] gold_mention = gold_mentions_expanded[b] # 选择对应mention的output,剩余的用0位置的output填充 # 例如gold_mention = [3,5,0,0], 那么就选择3和5位置的output,并且用0位置的output填充矩阵剩余部分 encoder_selected = torch.index_select(encoder_output, 0, gold_mention.long()) if b == 0: encoder_resorted = encoder_selected.unsqueeze(0) else: encoder_resorted = torch.cat( (encoder_resorted, encoder_selected.unsqueeze(0)), 0) # 通过decoder进行输出 # shape: (batch_size, max_sentence_length, num_classes) decoder_outputs = self._decode(encoder_resorted, mention_mask) # 按照token一个个计算 token_logits = [] token_predictions = [] token_class_probs = [] for i in range(max_input_sequence_length): encoder_slice = encoder_resorted[:, i, :] decoder_hidden = decoder_outputs[:, i, :] # source_mask_slice = source_mask[:, i].float() # TODO decoder hidden需要拼接上 h_encoder_t encoder_weights = self._attention(decoder_hidden, encoder_outputs, source_mask.float()) # 加权求和 # shape: (batch_size, hidden_dim) attended_output = util.weighted_sum(encoder_outputs, encoder_weights) # shape: (batch_size, hidden_dim * 3) hidden_attention_cat = torch.cat( (decoder_hidden, attended_output, encoder_slice), -1) # shape: (batch_size, num_classes) score = self._output_projection_layer(hidden_attention_cat) token_logits.append(score.unsqueeze(1)) class_probabilities = F.softmax(score, dim=-1) token_class_probs.append(class_probabilities.unsqueeze(1)) # shape (predicted_classes): (batch_size,) _, predicted_classes = torch.max(class_probabilities, 1) last_predictions = predicted_classes token_predictions.append(last_predictions.unsqueeze(1)) predictions = torch.cat(token_predictions, 1) class_probs = torch.cat(token_class_probs, 1) # 裁切超过target长度的 output_dict = { 'predictions': predictions, 'class_probs': class_probs.detach() } if target_tokens: targets = target_tokens['tokens'] target_length = targets.size()[1] # 下面的步骤主要在做裁切,因为输出的shape是(batch_size, max_sentence_length, num_classes) # 而target是(batch_size, max_target_length) max_sentence_length 和 max_target_length不相等 predictions_slice = predictions[:, :target_length] class_probs_slice = class_probs[:, :target_length, :] output_dict['predictions'] = predictions_slice output_dict['class_probs'] = class_probs_slice target_length = targets.size()[1] logits = torch.cat(token_logits, 1) # 裁切超过target长度的 logits_slice = logits[:, :target_length, :].contiguous() targets = targets.contiguous() mention_mask = mention_mask[:, :target_length].contiguous() loss = util.sequence_cross_entropy_with_logits( logits_slice.float(), targets, mention_mask.float()) output_dict['loss'] = loss output_dict['logits'] = logits_slice output_dict['mention_mask'] = mention_mask return output_dict def _decode(self, encoder_output, decode_mask): decoder_outputs = self._decoder(encoder_output, decode_mask) return decoder_outputs def get_metrics(self, reset: bool = False) -> Dict[str, float]: all_metrics: Dict[str, float] = {} if not self.training: all_metrics.update({'accuracy': self.acc.get_metric(reset=reset)}) all_metrics.update( {'label_accuracy': self.label_acc.get_metric(reset=reset)}) all_metrics.update( {'f1': self.f1.get_metric(reset=reset)['fscore']}) return all_metrics
class MaxMarginConditionalClassificationModel(MaxMarginConditionalModel): def __init__( self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', single_box: bool = False, softbox_temp: float = 10., margin: float = 0.0, number_of_negative_samples: int = 0, debug: bool = False, regularization_weight: float = 0, init_interval_center: float = 0.25, init_interval_delta: float = 0.1, # adversarial_negative: bool = False, # adv_neg_softmax_temp: float = 0.8 ) -> None: super().__init__( num_entities, num_relations, embedding_dim, box_type, single_box, softbox_temp, margin, number_of_negative_samples, debug, regularization_weight, init_interval_center, init_interval_delta) self.train_f1 = FBetaMeasure(average='micro') #self.valid_f1 = FBetaMeasure(average='micro') self.threshold_with_f1 = F1WithThreshold(flip_sign=True) self.istest = False self.test_threshold = None self.test_f1 = F1Measure(positive_label=1) def is_test(self) -> bool: if (not self.is_eval()) and self.test: raise RuntimeError("test flag is true but eval is false") return self.is_eval() and self.istest def test(self) -> None: if not self.is_eval(): raise RuntimeError("test flag is true but eval is false") self.istest = True def get_ranks(self, embeddings: Dict[str, BoxTensor]) -> Any: if self.is_test(): return self.get_test(embeddings) s = self._get_triple_score(embeddings['h'], embeddings['t'], embeddings['r']) # preds = torch.stack((p_s, n_s), dim=1) # shape = (batch, 2) #self.valid_f1(preds, labels) labels = embeddings['label'] # upate the metrics self.threshold_with_f1(s, labels) return {} def get_test(self, embeddings: Dict[str, BoxTensor]) -> Any: if self.test_threshold is None: raise RuntimeError("test_threshold should be set") s = self._get_triple_score(embeddings['h'], embeddings['t'], embeddings['r']) labels = embeddings['label'] pos_prediction = (s > self.test_threshold).float() neg_prediction = 1.0 - pos_prediction predictions = torch.stack((neg_prediction, pos_prediction), -1) self.test_f1(predictions, labels) return {} def get_metrics(self, reset: bool = False) -> Dict[str, float]: if self.is_eval(): if not self.test: metrics = self.threshold_with_f1.get_metric(reset) else: p, r, f = self.test_f1.get_metric(reset) metrics = {'precision': p, 'recall': r, 'fscore': f} else: metrics = self.train_f1.get_metric(reset) metrics[ 'regularization_loss'] = self.regularization_loss.get_metric( reset) return metrics def get_box_embeddings_val(self, h: torch.Tensor, t: torch.Tensor, r: torch.Tensor, label: torch.tensor) -> Dict[str, BoxTensor]: return BaseBoxModel.get_box_embeddings_val( self, h=h, t=t, r=r, label=label) def get_loss(self, scores: Tuple[torch.Tensor, torch.Tensor], label: torch.Tensor) -> torch.Tensor: # max margin loss expects label to be float label = label.to(scores[0].dtype) loss = self.loss_f(*scores, label) + self.get_regularization_penalty() # metrics require 0,1 labels if not self.is_eval(): with torch.no_grad(): labels = torch.zeros_like(scores[0]).reshape( -1) # shape = (batch) preds = torch.stack(scores, dim=1) self.train_f1(preds, labels) return loss
class BCEBoxClassificationModel(BCEBoxModel): def __init__(self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', single_box: bool = False, softbox_temp: float = 10., number_of_negative_samples: int = 0, debug: bool = False, regularization_weight: float = 0, init_interval_center: float = 0.25, init_interval_delta: float = 0.1, neg_samples_in_dataset_reader: int = 0) -> None: super().__init__( num_entities, num_relations, embedding_dim, box_type=box_type, single_box=single_box, softbox_temp=softbox_temp, number_of_negative_samples=number_of_negative_samples, debug=debug, regularization_weight=regularization_weight, init_interval_center=init_interval_center, init_interval_delta=init_interval_delta, neg_samples_in_dataset_reader=neg_samples_in_dataset_reader) self.train_f1 = FBetaMeasure(average='micro') # self.valid_f1 = FBetaMeasure(average='micro') self.threshold_with_f1 = F1WithThreshold(flip_sign=True) self.istest = False self.test_threshold = None # self.test_f1 = FBetaMeasure(average='macro') self.test_f1 = F1Measure(positive_label=1) def is_test(self) -> bool: if (not self.is_eval()) and self.test: raise RuntimeError("test flag is true but eval is false") return self.is_eval() and self.istest def test(self) -> None: if not self.is_eval(): raise RuntimeError("test flag is true but eval is false") self.istest = True def get_box_embeddings_val(self, h: torch.Tensor, t: torch.Tensor, r: torch.Tensor, label: torch.Tensor) -> Dict[str, BoxTensor]: return BaseBoxModel.get_box_embeddings_val(self, h=h, t=t, r=r, label=label) def get_loss(self, scores: torch.Tensor, label: torch.Tensor) -> torch.Tensor: log_p = scores log1mp = log1mexp(log_p) logits = torch.stack([log1mp, log_p], dim=-1) loss = self.loss_f(logits, label) + self.get_regularization_penalty() if torch.isnan(loss).any(): breakpoint() if not self.is_eval(): with torch.no_grad(): self.train_f1(logits, label) return loss def get_ranks(self, embeddings: Dict[str, BoxTensor]) -> Any: if self.is_test(): return self.get_test(embeddings) s = self._get_triple_score(embeddings['h'], embeddings['t'], embeddings['r']) # preds = torch.stack((p_s, n_s), dim=1) # shape = (batch, 2) # self.valid_f1(preds, labels) labels = embeddings['label'] # upate the metrics self.threshold_with_f1(s, labels) return {} def get_test(self, embeddings: Dict[str, BoxTensor]) -> Any: if self.test_threshold is None: raise RuntimeError("test_threshold should be set") s = self._get_triple_score(embeddings['h'], embeddings['t'], embeddings['r']) labels = embeddings['label'] pos_prediction = (s > self.test_threshold).float() neg_prediction = 1.0 - pos_prediction predictions = torch.stack((neg_prediction, pos_prediction), -1) self.test_f1(predictions, labels) return {} def get_metrics(self, reset: bool = False) -> Dict[str, float]: if self.is_eval(): if not self.istest: metrics = self.threshold_with_f1.get_metric(reset) else: p, r, f = self.test_f1.get_metric(reset) metrics = {'precision': p, 'recall': r, 'fscore': f} else: metrics = self.train_f1.get_metric(reset) metrics[ 'regularization_loss'] = self.regularization_loss.get_metric( reset) return metrics def get_regularization_penalty(self) -> Union[float, torch.Tensor]: if self.is_eval(): return 0.0 if self.regularization_weight > 0: all_ = self.h.all_boxes deltas = all_.Z - all_.z with torch.no_grad(): assert (deltas >= 0.0).all() penalty = self.regularization_weight * torch.sum(deltas) # track the reg loss self.regularization_loss(penalty.item()) return penalty else: return 0.0
class RationaleBaseModel(Model): def __init__( self, vocab: Vocabulary, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ): super(RationaleBaseModel, self).__init__(vocab, regularizer) self._vocabulary = vocab self._f1_metric = FBetaMeasure() self._accuracy = CategoricalAccuracy() self.prediction_mode = False initializer(self) def forward(self, document, query=None, labels=None, metadata=None, **kwargs): # pylint: disable=arguments-differ raise NotImplementedError def decode(self, output_dict): output_dict = self._decode(output_dict) output_labels = self._vocabulary.get_index_to_token_vocabulary( "labels") predicted_labels, gold_labels = [], [] for p, g in zip(output_dict["predicted_label"], output_dict["label"]): predicted_labels.append(output_labels[int(p)]) gold_labels.append(output_labels[int(g)]) output_dict["predicted_label"] = predicted_labels output_dict["label"] = gold_labels output_dict["annotation_id"] = [ d['annotation_id'] for d in output_dict['metadata'] ] del output_dict['metadata'] return output_dict def _call_metrics(self, output_dict): self._f1_metric(output_dict["probs"], output_dict["gold_labels"]) self._accuracy(output_dict["probs"], output_dict["gold_labels"]) def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = self._f1_metric.get_metric(reset) macro_avg = {'macro_' + k: sum(v) / len(v) for k, v in metrics.items()} output_labels = self._vocabulary.get_index_to_token_vocabulary( "labels") output_labels = [output_labels[i] for i in range(len(output_labels))] class_metrics = {} for k, v in metrics.items(): assert len(v) == len(output_labels) class_nums = dict(zip(output_labels, v)) class_metrics.update( {k + "_" + str(kc): x for kc, x in class_nums.items()}) class_metrics.update({"accuracy": self._accuracy.get_metric(reset)}) class_metrics.update(macro_avg) modified_class_metrics = {} for k, v in class_metrics.items(): if k in ["accuracy", "macro_fscore"]: modified_class_metrics[k] = v else: modified_class_metrics["_" + k] = v modified_class_metrics["validation_metric"] = class_metrics[ "macro_fscore"] return modified_class_metrics def normalize_attentions(self, output_dict): """ In case, attention is over subtokens rather than at token level. Combine subtoken attention into token attention. """ return output_dict def combine_document_query(self, document, query): reader = document[0]["reader_object"] device = next(self.parameters()).device return { k: ({x: y.to(device) for x, y in v.items()} if type(v) == dict else v.to(device)) for k, v in reader.combine_document_query( document, query, self._vocabulary).items() }
class SimpleTagger(Model): def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout: float = 0.1, ff_dim: int = 100): super().__init__(vocab) self.embedder = embedder self.encoder = encoder assert self.embedder.get_output_dim() == self.encoder.get_input_dim() self.feedforward = FeedForward( encoder.get_output_dim(), 1, hidden_dims=ff_dim, activations=Activation.by_name('relu')(), dropout=dropout) self.out = torch.nn.Linear( in_features=self.feedforward.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.crf = ConditionalRandomField(vocab.get_vocab_size('labels')) self.f1 = FBetaMeasure(average='micro') self.accuracy = CategoricalAccuracy() self.idx_to_label = vocab.get_index_to_token_vocabulary('labels') def forward(self, tokens: Dict[str, torch.Tensor], tags: torch.Tensor) -> Dict[str, torch.Tensor]: mask = get_text_field_mask(tokens) embeddings = self.embedder(tokens) encoder_out = self.encoder(embeddings, mask) encoder_out = self.feedforward(encoder_out) logits = self.out(encoder_out) output = {"logits": logits, "mask": mask} if tags is not None: self.accuracy(logits, tags, mask) self.f1(logits, tags, mask) output['loss'] = -self.crf(logits, tags, mask) else: output["logits"] = self.crf.viterbi_tags(logits, mask) return output def decode(self, output_dict: Dict[str, torch.Tensor]): logits = output_dict["logits"] mask = output_dict["mask"] tag_logits = torch.argmax(logits, dim=2).tolist() lengths = torch.sum(mask, dim=1).tolist() all_labels = [] for sample_num in range(len(tag_logits)): labels = [] for label_idx in range(lengths[sample_num]): labels.append( self.idx_to_label[tag_logits[sample_num][label_idx]]) all_labels.append(labels) return {"labels": all_labels} def get_metrics(self, reset: bool = False) -> Dict[str, float]: out = {"accuracy": self.accuracy.get_metric(reset)} out.update(self.f1.get_metric(reset)) return out
class ClassificationModel(Model, ABC, metaclass=ABCMeta): def __init__( self, vocab: Vocabulary, classification_type: str = 'multi-class', pos_label: str = None, threshold: float = 0.5, neg_weight: float = 1.0, label_namespace: str = 'labels', regularizer: Optional[RegularizerApplicator] = None, ): super().__init__(vocab, regularizer) self._classification_type = classification_type self._label_namespace = label_namespace self._threshold = threshold self._neg_weight = neg_weight self._pos_label_index = vocab.get_token_index( pos_label, namespace=label_namespace) if pos_label else None self._use_threshold = False if self._classification_type == "ce": self._loss = torch.nn.CrossEntropyLoss() self._accuracy = CategoricalAccuracy() if self._pos_label_index is not None: self._f1 = FBetaMeasure(average=None) else: self._f1 = FBetaMeasure(average='micro') elif self._classification_type == "bce": # BCE 是否可以指定全负样本 assert self._pos_label_index is None self._loss = torch.nn.BCEWithLogitsLoss() self._accuracy = BooleanAccuracy() self._f1 = BooleanF1() self._use_threshold = True elif self._classification_type == "as": # AS should given _pos_label_index assert self._pos_label_index is not None self._loss = AdaptiveScalingLossLayer( num_label=vocab.get_vocab_size(label_namespace), positive_idx=[self._pos_label_index]) self._accuracy = CategoricalAccuracy() self._f1 = FBetaMeasure(average=None) else: raise NotImplementedError( 'Classification Type Not Implemented: %s' % self._classification_type) def get_output_dict( self, logits, label=None, metadata=None, ): if self._use_threshold: probs = torch.sigmoid(logits) else: probs = torch.nn.functional.softmax(logits, dim=-1) output_dict = { "logits": logits, "probs": probs, } if metadata: output_dict["metadata"] = metadata if label is not None: if self._use_threshold: loss = self._loss(logits, label.float()) self._accuracy(logits > 0.5, label.bool()) self._f1(logits > 0.5, label.bool()) output_dict['loss'] = loss else: loss = self._loss(logits, label) # _, pred = torch.max(logits, -1) self._accuracy(logits, label) self._f1(logits, label) output_dict['loss'] = loss return output_dict def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics = self._f1.get_metric(reset) if self._pos_label_index is not None: # 0 is None metrics = { key: value[self._pos_label_index] for key, value in metrics.items() } accuracy = self._accuracy.get_metric(reset) metrics.update({'accuracy': accuracy}) metrics['precision'] = metrics['precision'] * 100 metrics['recall'] = metrics['recall'] * 100 metrics['fscore'] = metrics['fscore'] * 100 metrics['accuracy'] = metrics['accuracy'] * 100 return metrics def decode( self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Decode probs to label classification_type is ``multi-class`` Does a simple argmax over the probabilities, classification_type is ``multi-label`` Does a simple threshold filter over the probabilities, converts index to string label, and add ``"label"`` key to the dictionary with the result. """ predictions = output_dict["probs"].cpu() if self._use_threshold: if predictions.dim() == 2: predictions_list = [ predictions[i] for i in range(predictions.shape[0]) ] else: predictions_list = [predictions] classes = [] for prediction in predictions_list: label_str = list() for label_idx, predict in enumerate( prediction > self._threshold): if not predict: continue label_str += [ self.vocab.get_token_from_index( label_idx, namespace=self._label_namespace) ] classes.append(label_str) output_dict["label"] = classes else: if predictions.dim() == 2: predictions_list = [ predictions[i] for i in range(predictions.shape[0]) ] else: predictions_list = [predictions] classes = [] for prediction in predictions_list: label_idx = prediction.argmax(dim=-1).item() label_str = self.vocab.get_token_from_index( label_idx, namespace=self._label_namespace) classes.append(label_str) output_dict["label"] = classes return output_dict