def test_fbeta_multiclass_with_mask(self):
        mask = torch.Tensor([1, 1, 1, 1, 1, 0])

        fbeta = FBetaMeasure()
        fbeta(self.predictions, self.targets, mask)
        metric = fbeta.get_metric()
        precisions = metric['precision']
        recalls = metric['recall']
        fscores = metric['fscore']

        numpy.testing.assert_almost_equal(fbeta._pred_sum.tolist(),
                                          [1, 3, 0, 1, 0])
        numpy.testing.assert_almost_equal(fbeta._true_sum.tolist(),
                                          [2, 1, 0, 1, 1])
        numpy.testing.assert_almost_equal(fbeta._true_positive_sum.tolist(),
                                          [1, 1, 0, 1, 0])

        desired_precisions = [1.00, 0.33, 0.00, 1.00, 0.00]
        desired_recalls = [0.50, 1.00, 0.00, 1.00, 0.00]
        desired_fscores = [(2 * p * r) / (p + r) if p + r != 0.0 else 0.0
                           for p, r in zip(desired_precisions, desired_recalls)
                           ]
        numpy.testing.assert_almost_equal(precisions,
                                          desired_precisions,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, desired_recalls, decimal=2)
        numpy.testing.assert_almost_equal(fscores, desired_fscores, decimal=2)
Exemple #2
0
    def test_fbeta_multiclass_with_micro_average(self):
        labels = [1, 3]
        fbeta = FBetaMeasure(average="micro", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        true_positives = [1, 1]
        false_positives = [3, 0]
        false_negatives = [0, 0]
        mean_true_positive = numpy.mean(true_positives)
        mean_false_positive = numpy.mean(false_positives)
        mean_false_negative = numpy.mean(false_negatives)

        micro_precision = mean_true_positive / (mean_true_positive +
                                                mean_false_positive)
        micro_recall = mean_true_positive / (mean_true_positive +
                                             mean_false_negative)
        micro_fscore = (2 * micro_precision *
                        micro_recall) / (micro_precision + micro_recall)
        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          micro_precision,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, micro_recall, decimal=2)
        numpy.testing.assert_almost_equal(fscores, micro_fscore, decimal=2)
Exemple #3
0
    def test_fbeta_multiclass_with_mask(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        mask = torch.tensor([True, True, True, True, True, False], device=device)

        fbeta = FBetaMeasure()
        fbeta(self.predictions, self.targets, mask)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        assert_allclose(fbeta._pred_sum.tolist(), [1, 3, 0, 1, 0])
        assert_allclose(fbeta._true_sum.tolist(), [2, 1, 0, 1, 1])
        assert_allclose(fbeta._true_positive_sum.tolist(), [1, 1, 0, 1, 0])

        desired_precisions = [1.00, 1 / 3, 0.00, 1.00, 0.00]
        desired_recalls = [0.50, 1.00, 0.00, 1.00, 0.00]
        desired_fscores = [
            (2 * p * r) / (p + r) if p + r != 0.0 else 0.0
            for p, r in zip(desired_precisions, desired_recalls)
        ]
        assert_allclose(precisions, desired_precisions)
        assert_allclose(recalls, desired_recalls)
        assert_allclose(fscores, desired_fscores)
    def test_fbeta_multiclass_micro_average_metric(self):
        fbeta = FBetaMeasure(average='micro')
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric['precision']
        recalls = metric['recall']
        fscores = metric['fscore']

        true_positives = [1, 1, 0, 1, 0]
        false_positives = [0, 3, 0, 0, 0]
        false_negatives = [2, 0, 0, 0, 1]
        mean_true_positive = numpy.mean(true_positives)
        mean_false_positive = numpy.mean(false_positives)
        mean_false_negative = numpy.mean(false_negatives)

        micro_precision = mean_true_positive / (mean_true_positive +
                                                mean_false_positive)
        micro_recall = mean_true_positive / (mean_true_positive +
                                             mean_false_negative)
        micro_fscore = (2 * micro_precision *
                        micro_recall) / (micro_precision + micro_recall)
        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          micro_precision,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, micro_recall, decimal=2)
        numpy.testing.assert_almost_equal(fscores, micro_fscore, decimal=2)
Exemple #5
0
    def test_fbeta_multiclass_with_micro_average(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        labels = [1, 3]
        fbeta = FBetaMeasure(average="micro", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        # We keep the expected values in CPU because FBetaMeasure returns them in CPU.
        true_positives = torch.tensor([1, 1], dtype=torch.float32)
        false_positives = torch.tensor([3, 0], dtype=torch.float32)
        false_negatives = torch.tensor([0, 0], dtype=torch.float32)
        mean_true_positive = true_positives.mean()
        mean_false_positive = false_positives.mean()
        mean_false_negative = false_negatives.mean()

        micro_precision = mean_true_positive / (mean_true_positive + mean_false_positive)
        micro_recall = mean_true_positive / (mean_true_positive + mean_false_negative)
        micro_fscore = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall)
        # check value
        assert_allclose(precisions, micro_precision)
        assert_allclose(recalls, micro_recall)
        assert_allclose(fscores, micro_fscore)
Exemple #6
0
class VisualEntailmentHead(Head):
    def __init__(self, vocab: Vocabulary, embedding_dim: int, label_namespace: str = "labels"):
        super().__init__(vocab)

        num_labels = vocab.get_vocab_size(label_namespace)
        self.label_namespace = label_namespace
        self.classifier = torch.nn.Linear(embedding_dim, num_labels)

        from allennlp.training.metrics import CategoricalAccuracy
        from allennlp.training.metrics import FBetaMeasure

        self.accuracy = CategoricalAccuracy()
        self.fbeta = FBetaMeasure(beta=1.0, average="macro")

    @overrides
    def forward(
        self,  # type: ignore
        encoded_boxes: torch.Tensor,
        encoded_boxes_mask: torch.Tensor,
        encoded_boxes_pooled: torch.Tensor,
        encoded_text: torch.Tensor,
        encoded_text_mask: torch.Tensor,
        encoded_text_pooled: torch.Tensor,
        pooled_boxes_and_text: torch.Tensor,
        labels: Optional[torch.Tensor] = None,
        label_weights: Optional[torch.Tensor] = None,
    ) -> Dict[str, torch.Tensor]:
        logits = self.classifier(pooled_boxes_and_text)
        probs = torch.softmax(logits, dim=-1)

        output = {"logits": logits, "probs": probs}

        assert label_weights is None
        if labels is not None:
            output["loss"] = torch.nn.functional.cross_entropy(logits, labels) / logits.size(0)
            self.accuracy(logits, labels)
            self.fbeta(probs, labels)

        return output

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        result = self.fbeta.get_metric(reset)
        result["acc"] = self.accuracy.get_metric(reset)
        return result

    def make_output_human_readable(
        self, output_dict: Dict[str, torch.Tensor]
    ) -> Dict[str, torch.Tensor]:
        if len(output_dict) <= 0:
            return output_dict
        logits = output_dict["logits"]
        entailment_answer_index = logits.argmax(-1)
        entailment_answer = [
            self.vocab.get_token_from_index(int(i), "labels") for i in entailment_answer_index
        ]
        output_dict["entailment_answer"] = entailment_answer
        return output_dict

    default_predictor = "vilbert_ve"
Exemple #7
0
class RationaleBaseModel(Model):
    def __init__(
        self,
        vocab: Vocabulary,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):
        super(RationaleBaseModel, self).__init__(vocab, regularizer)
        self._vocabulary = vocab
        self._f1_metric = FBetaMeasure()
        self._accuracy = CategoricalAccuracy()

        self.prediction_mode = False
        
        initializer(self)

    def forward(self, document, sentence_indices, query=None, labels=None, metadata=None):
        raise NotImplementedError

    def decode(self, output_dict):
        output_dict = self._decode(output_dict)
        return output_dict

    def _call_metrics(self, output_dict) : 
        self._f1_metric(output_dict['logits'], output_dict['gold_labels'])
        self._accuracy(output_dict['logits'], output_dict['gold_labels'])

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = self._f1_metric.get_metric(reset)
        output_labels = self._vocabulary.get_index_to_token_vocabulary("labels")
        output_labels = [output_labels[i] for i in range(len(output_labels))]

        class_metrics = {}
        for k, v in metrics.items():
            assert len(v) == len(output_labels)
            class_nums = dict(zip(output_labels, v))
            class_metrics.update({k + "_" + str(kc): x for kc, x in class_nums.items()})

        class_metrics.update({"accuracy": self._accuracy.get_metric(reset)})
        modified_class_metrics = {}

        for k, v in class_metrics.items() :
            if k.endswith('_1') or k == 'accuracy':
                modified_class_metrics[k] = v
            else :
                modified_class_metrics['_' + k] = v

        return modified_class_metrics

    def normalize_attentions(self, output_dict) :
        '''
        In case, attention is over subtokens rather than at token level. 
        Combine subtoken attention into token attention.
        '''

        return output_dict
Exemple #8
0
class NLIModel(Model):

    default_predictor = "NLIPredictor"

    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder = None,
                 encoder: Seq2VecEncoder = None,
                 dropout: float = 0.3):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder or BertCLSPooler(self.embedder.get_output_dim())
        self.dropout = nn.Dropout(dropout)
        num_classes = self.vocab.get_vocab_size("labels")
        assert num_classes > 0, "Wrong namespace for labels apparently"
        self.clf = nn.Linear(self.encoder.get_output_dim(), num_classes)
        self.accuracy = CategoricalAccuracy()
        assert num_classes == 2 or num_classes == 3
        labels = list(range(num_classes))
        self.f1 = FBetaMeasure(average=None, labels=labels)

    def forward(self,
                tokens: Dict[str, Dict[str, torch.LongTensor]],
                labels: torch.LongTensor = None,
                **kwargs) -> Dict[str, torch.Tensor]:
        mask = get_text_field_mask(tokens)
        embedded = self.embedder(tokens)
        embedded = self.dropout(embedded)
        encoded_cls = self.encoder(embedded, mask)
        logits = self.clf(encoded_cls)
        # logits - batch_size, num_classes
        output_dict = {"logits": logits}
        if labels is not None:
            # labels - batch_size
            labels = labels.view(-1)
            loss = cross_entropy(logits, labels)
            output_dict["loss"] = loss
            self.accuracy(logits, labels)
            self.f1(logits, labels)
        return output_dict

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = {}
        acc: float = self.accuracy.get_metric(reset)
        metrics["accuracy"] = acc
        f1 = self.f1.get_metric(reset)
        for name, idx in self.vocab.get_token_to_index_vocabulary(
                "labels").items():
            for metric_name, value in f1.items():
                metrics[name + "_" + metric_name] = value[idx]
        return metrics

    def make_output_human_readable(
            self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, Any]:
        return output_dict
Exemple #9
0
    def test_fbeta_handles_batch_size_of_one(self, device: str):
        predictions = torch.tensor([[0.2862, 0.3479, 0.1627, 0.2033]], device=device)
        targets = torch.tensor([1], device=device)
        mask = torch.tensor([True], device=device)

        fbeta = FBetaMeasure()
        fbeta(predictions, targets, mask)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]

        assert_allclose(precisions, [0.0, 1.0, 0.0, 0.0])
        assert_allclose(recalls, [0.0, 1.0, 0.0, 0.0])
Exemple #10
0
    def test_fbeta_handles_batch_size_of_one(self):
        predictions = torch.Tensor([[0.2862, 0.3479, 0.1627, 0.2033]])
        targets = torch.Tensor([1])
        mask = torch.Tensor([1])

        fbeta = FBetaMeasure()
        fbeta(predictions, targets, mask)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]

        numpy.testing.assert_almost_equal(precisions, [0.0, 1.0, 0.0, 0.0])
        numpy.testing.assert_almost_equal(recalls, [0.0, 1.0, 0.0, 0.0])
Exemple #11
0
    def test_fbeta_handles_no_prediction_true_all_class(self, device: str):

        predictions = torch.tensor([[0.65, 0.35], [0.0, 0.0]], device=device)
        # preds = [0, NA]
        targets = torch.tensor([1, 1], device=device)

        fbeta = FBetaMeasure()
        fbeta(predictions, targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        assert_allclose(precisions, [0.0, 0.0])
        assert_allclose(recalls, [0.0, 0.0])
        assert_allclose(fscores, [0.0, 0.0])
    def test_fbeta_multiclass_with_explicit_labels(self):
        # same prediction but with and explicit label ordering
        fbeta = FBetaMeasure(labels=[4, 3, 2, 1, 0])
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric['precision']
        recalls = metric['recall']
        fscores = metric['fscore']

        desired_precisions = self.desired_precisions[::-1]
        desired_recalls = self.desired_recalls[::-1]
        desired_fscores = self.desired_fscores[::-1]
        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          desired_precisions,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, desired_recalls, decimal=2)
        numpy.testing.assert_almost_equal(fscores, desired_fscores, decimal=2)
Exemple #13
0
    def test_fbeta_multiclass_with_explicit_labels(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        # same prediction but with and explicit label ordering
        fbeta = FBetaMeasure(labels=[4, 3, 2, 1, 0])
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        desired_precisions = self.desired_precisions[::-1]
        desired_recalls = self.desired_recalls[::-1]
        desired_fscores = self.desired_fscores[::-1]
        # check value
        assert_allclose(precisions, desired_precisions)
        assert_allclose(recalls, desired_recalls)
        assert_allclose(fscores, desired_fscores)
Exemple #14
0
    def test_fbeta_multiclass_with_macro_average(self):
        labels = [0, 1]
        fbeta = FBetaMeasure(average="macro", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        macro_precision = numpy.array(self.desired_precisions)[labels].mean()
        macro_recall = numpy.array(self.desired_recalls)[labels].mean()
        macro_fscore = numpy.array(self.desired_fscores)[labels].mean()

        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          macro_precision,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, macro_recall, decimal=2)
        numpy.testing.assert_almost_equal(fscores, macro_fscore, decimal=2)
Exemple #15
0
    def test_fbeta_multiclass_metric(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        fbeta = FBetaMeasure()
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        # check value
        assert_allclose(precisions, self.desired_precisions)
        assert_allclose(recalls, self.desired_recalls)
        assert_allclose(fscores, self.desired_fscores)

        # check type
        assert isinstance(precisions, List)
        assert isinstance(recalls, List)
        assert isinstance(fscores, List)
Exemple #16
0
    def test_fbeta_multiclass_with_macro_average(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        labels = [0, 1]
        fbeta = FBetaMeasure(average="macro", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        # We keep the expected values in CPU because FBetaMeasure returns them in CPU.
        macro_precision = torch.tensor(self.desired_precisions)[labels].mean()
        macro_recall = torch.tensor(self.desired_recalls)[labels].mean()
        macro_fscore = torch.tensor(self.desired_fscores)[labels].mean()

        # check value
        assert_allclose(precisions, macro_precision)
        assert_allclose(recalls, macro_recall)
        assert_allclose(fscores, macro_fscore)
def multiple_runs(
    global_rank: int,
    world_size: int,
    gpu_id: Union[int, torch.device],
    metric: FBetaMeasure,
    metric_kwargs: Dict[str, List[Any]],
    desired_values: Dict[str, Any],
    exact: Union[bool, Tuple[float, float]] = True,
):

    kwargs = {}
    # Use the arguments meant for the process with rank `global_rank`.
    for argname in metric_kwargs:
        kwargs[argname] = metric_kwargs[argname][global_rank]

    for i in range(200):
        metric(**kwargs)

    metric_values = metric.get_metric()

    for key in desired_values:
        assert_allclose(desired_values[key], metric_values[key])
    def test_fbeta_multiclass_marco_average_metric(self):
        fbeta = FBetaMeasure(average='macro')
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric['precision']
        recalls = metric['recall']
        fscores = metric['fscore']

        macro_precision = numpy.mean(self.desired_precisions)
        macro_recall = numpy.mean(self.desired_recalls)
        macro_fscore = numpy.mean(self.desired_fscores)
        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          macro_precision,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls, macro_recall, decimal=2)
        numpy.testing.assert_almost_equal(fscores, macro_fscore, decimal=2)

        # check type
        assert isinstance(precisions, float)
        assert isinstance(recalls, float)
        assert isinstance(fscores, float)
Exemple #19
0
    def test_fbeta_multiclass_with_weighted_average(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        labels = [0, 1]
        fbeta = FBetaMeasure(average="weighted", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        weighted_precision, weighted_recall, weighted_fscore, _ = precision_recall_fscore_support(
            self.targets.cpu().numpy(),
            self.predictions.argmax(dim=1).cpu().numpy(),
            labels=labels,
            average="weighted",
        )

        # check value
        assert_allclose(precisions, weighted_precision)
        assert_allclose(recalls, weighted_recall)
        assert_allclose(fscores, weighted_fscore)
    def test_fbeta_multiclass_metric(self):
        fbeta = FBetaMeasure()
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric['precision']
        recalls = metric['recall']
        fscores = metric['fscore']

        # check value
        numpy.testing.assert_almost_equal(precisions,
                                          self.desired_precisions,
                                          decimal=2)
        numpy.testing.assert_almost_equal(recalls,
                                          self.desired_recalls,
                                          decimal=2)
        numpy.testing.assert_almost_equal(fscores,
                                          self.desired_fscores,
                                          decimal=2)

        # check type
        assert isinstance(precisions, List)
        assert isinstance(recalls, List)
        assert isinstance(fscores, List)
class VisualEntailmentModel(VisionTextModel):
    """
    Model for visual entailment task based on the paper
    [Visual Entailment: A Novel Task for Fine-Grained Image Understanding]
    (https://api.semanticscholar.org/CorpusID:58981654).

    # Parameters

    vocab : `Vocabulary`
    text_embeddings : `TransformerEmbeddings`
    image_embeddings : `ImageFeatureEmbeddings`
    encoder : `BiModalEncoder`
    pooled_output_dim : `int`
    fusion_method : `str`, optional (default = `"sum"`)
    dropout : `float`, optional (default = `0.1`)
    label_namespace : `str`, optional (default = `labels`)
    """
    def __init__(
        self,
        vocab: Vocabulary,
        text_embeddings: TransformerEmbeddings,
        image_embeddings: ImageFeatureEmbeddings,
        encoder: BiModalEncoder,
        pooled_output_dim: int,
        fusion_method: str = "sum",
        dropout: float = 0.1,
        label_namespace: str = "labels",
        *,
        ignore_text: bool = False,
        ignore_image: bool = False,
    ) -> None:

        super().__init__(
            vocab,
            text_embeddings,
            image_embeddings,
            encoder,
            pooled_output_dim,
            fusion_method,
            dropout,
            label_namespace,
            is_multilabel=False,
        )

        self.accuracy = CategoricalAccuracy()
        self.fbeta = FBetaMeasure(beta=1.0, average="macro")

    @overrides
    def forward(
        self,  # type: ignore
        box_features: torch.Tensor,
        box_coordinates: torch.Tensor,
        box_mask: torch.Tensor,
        hypothesis: TextFieldTensors,
        labels: Optional[torch.Tensor] = None,
    ) -> Dict[str, torch.Tensor]:

        return super().forward(
            box_features,
            box_coordinates,
            box_mask,
            text=hypothesis,
            labels=labels,
            label_weights=None,
        )

    @overrides
    def _compute_loss_and_metrics(
        self,
        batch_size: int,
        outputs: torch.Tensor,
        label: torch.Tensor,
        label_weights: Optional[torch.Tensor] = None,
    ):
        assert label_weights is None
        if label is not None:
            outputs["loss"] = (
                torch.nn.functional.cross_entropy(outputs["logits"], label) /
                batch_size)
            self.accuracy(outputs["logits"], label)
            self.fbeta(outputs["probs"], label)
        return outputs

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = self.fbeta.get_metric(reset)
        accuracy = self.accuracy.get_metric(reset)
        metrics.update({"accuracy": accuracy})
        return metrics

    @overrides
    def make_output_human_readable(
            self, output_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        batch_labels = []
        for batch_index, batch in enumerate(output_dict["probs"]):
            labels = np.argmax(batch, axis=-1)
            batch_labels.append(labels)
        output_dict["labels"] = batch_labels
        return output_dict

    default_predictor = "vilbert_ve"
Exemple #22
0
class Nlvr2Model(VisionTextModel):
    """
    Model for visual entailment task based on the paper
    [A Corpus for Reasoning About Natural Language Grounded in Photographs]
    (https://api.semanticscholar.org/CorpusID:53178856).

    # Parameters

    vocab : `Vocabulary`
    text_embeddings : `TransformerEmbeddings`
    image_embeddings : `ImageFeatureEmbeddings`
    encoder : `BiModalEncoder`
    pooled_output_dim : `int`
    fusion_method : `str`, optional (default = `"mul"`)
    dropout : `float`, optional (default = `0.1`)
    label_namespace : `str`, optional (default = `labels`)
    """
    def __init__(
        self,
        vocab: Vocabulary,
        text_embeddings: TransformerEmbeddings,
        image_embeddings: ImageFeatureEmbeddings,
        encoder: BiModalEncoder,
        pooled_output_dim: int,
        fusion_method: str = "mul",
        dropout: float = 0.1,
        label_namespace: str = "labels",
        *,
        ignore_text: bool = False,
        ignore_image: bool = False,
    ) -> None:

        super().__init__(
            vocab,
            text_embeddings,
            image_embeddings,
            encoder,
            pooled_output_dim,
            fusion_method,
            dropout,
            label_namespace,
            is_multilabel=False,
        )

        self.pooled_output_dim = pooled_output_dim

        self.layer1 = torch.nn.Linear(pooled_output_dim * 2, pooled_output_dim)
        self.layer2 = torch.nn.Linear(pooled_output_dim, 2)

        self.activation = torch.nn.ReLU()

        self.accuracy = CategoricalAccuracy()
        self.fbeta = FBetaMeasure(beta=1.0, average="macro")

    def forward(
        self,  # type: ignore
        box_features: torch.Tensor,
        box_coordinates: torch.Tensor,
        box_mask: torch.Tensor,
        hypothesis: TextFieldTensors,
        label: Optional[torch.Tensor] = None,
        identifier: List[Dict[str, Any]] = None,
    ) -> Dict[str, torch.Tensor]:
        batch_size = box_features.shape[0]

        pooled_outputs = self.backbone(
            box_features, box_coordinates, box_mask,
            hypothesis)["pooled_boxes_and_text"].transpose(0, 1)

        hidden = self.layer1(
            torch.cat((pooled_outputs[0], pooled_outputs[1]), dim=-1))

        # Shape: (batch_size, num_labels)
        logits = self.layer2(self.activation(hidden))

        # Shape: (batch_size, num_labels)
        probs = torch.softmax(logits, dim=-1)

        outputs = {"logits": logits, "probs": probs}
        outputs = self._compute_loss_and_metrics(batch_size, outputs, label)

        return outputs

    def _compute_loss_and_metrics(
        self,
        batch_size: int,
        outputs: Dict[str, torch.Tensor],
        label: torch.Tensor,
        label_weights: Optional[torch.Tensor] = None,
    ) -> Dict[str, torch.Tensor]:
        if label_weights is not None:
            raise NotImplementedError(
                "This implementation does not support label_weights.")
        if label is not None:
            outputs["loss"] = (
                torch.nn.functional.cross_entropy(outputs["logits"], label) /
                batch_size)
            self.accuracy(outputs["logits"], label)
            self.fbeta(outputs["probs"], label)
        return outputs

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = self.fbeta.get_metric(reset)
        accuracy = self.accuracy.get_metric(reset)
        metrics.update({"accuracy": accuracy})
        return metrics

    def make_output_human_readable(
            self, output_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        batch_labels = []
        for batch_index, batch in enumerate(output_dict["probs"]):
            labels = np.argmax(batch, axis=-1)
            batch_labels.append(labels)
        output_dict["labels"] = batch_labels
        return output_dict

    default_predictor = "nlvr2"
class Nlvr2Head(Head):
    def __init__(self,
                 vocab: Vocabulary,
                 embedding_dim: int,
                 label_namespace: str = "labels"):
        super().__init__(vocab)

        self.label_namespace = label_namespace

        self.layer1 = torch.nn.Linear(embedding_dim * 2, embedding_dim)
        self.layer2 = torch.nn.Linear(embedding_dim, 2)

        self.activation = torch.nn.ReLU()

        from allennlp.training.metrics import CategoricalAccuracy
        from allennlp.training.metrics import FBetaMeasure

        self.accuracy = CategoricalAccuracy()
        self.fbeta = FBetaMeasure(beta=1.0, average="macro")

    def forward(
        self,  # type: ignore
        encoded_boxes: torch.Tensor,
        encoded_boxes_mask: torch.Tensor,
        encoded_boxes_pooled: torch.Tensor,
        encoded_text: torch.Tensor,
        encoded_text_mask: torch.Tensor,
        encoded_text_pooled: torch.Tensor,
        pooled_boxes_and_text: torch.Tensor,
        label: Optional[torch.Tensor] = None,
        label_weights: Optional[torch.Tensor] = None,
    ) -> Dict[str, torch.Tensor]:
        pooled_boxes_and_text = pooled_boxes_and_text.transpose(0, 1)
        hidden = self.layer1(
            torch.cat((pooled_boxes_and_text[0], pooled_boxes_and_text[1]),
                      dim=-1))
        logits = self.layer2(self.activation(hidden))
        probs = torch.softmax(logits, dim=-1)

        output = {"logits": logits, "probs": probs}

        assert label_weights is None
        if label is not None:
            output["loss"] = torch.nn.functional.cross_entropy(
                logits, label) / logits.size(0)
            self.accuracy(logits, label)
            self.fbeta(probs, label)

        return output

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        result = self.fbeta.get_metric(reset)
        result["accuracy"] = self.accuracy.get_metric(reset)
        return result

    def make_output_human_readable(
            self, output_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        if len(output_dict) <= 0:
            return output_dict
        logits = output_dict["logits"]
        entailment_answer_index = logits.argmax(-1)
        entailment_answer = [
            self.vocab.get_token_from_index(int(i), "labels")
            for i in entailment_answer_index
        ]
        output_dict["entailment_answer"] = entailment_answer
        return output_dict

    default_predictor = "nlvr2"
Exemple #24
0
class PrePruner(Model):
    def __init__(self,
                 vocab,
                 feature_size: int,
                 max_span_width: int,
                 keep_rate: int,
                 mlp_dropout: float = 0.4,
                 embedder_type=None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PrePruner, self).__init__(vocab, regularizer)
        self.keep_rate = keep_rate
        self.embedder = get_embeddings(embedder_type, self.vocab)
        self.ffn = FeedForward(300, 2, 300, F.relu, 0.5)
        embedding_dim = self.embedder.get_output_dim()

        self._span_extractor = PoolingSpanExtractor(
            embedding_dim,
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)
        entity_feedforward = FeedForward(self._span_extractor.get_output_dim(),
                                         2, 150, F.relu, mlp_dropout)

        self.feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(entity_feedforward),
            TimeDistributed(
                torch.nn.Linear(entity_feedforward.get_output_dim(), 1)),
        )
        self._lexical_dropout = torch.nn.Dropout(p=0.1)

        self.loss = torch.nn.BCELoss()
        self._metric_f1 = FBetaMeasure()

    def forward(self,
                text: Dict[str, torch.LongTensor],
                spans: torch.IntTensor,
                labels: torch.IntTensor = None,
                **kwargs):
        text_embeddings = self._lexical_dropout(self.embedder(text))

        # Shape: (batch_size, num_spans)
        span_mask = (spans[:, :, 0] >= 0).squeeze(-1).float()
        spans = F.relu(spans.float()).long()

        span_embeddings = self._span_extractor(text_embeddings,
                                               spans,
                                               span_indices_mask=span_mask)

        span_scores = self.feedforward_scorer(span_embeddings)

        span_scores = span_scores.squeeze(-1)
        span_scores += span_mask.log()
        span_scores = span_scores.sigmoid()
        topk_idx = torch.topk(span_scores,
                              int(self.keep_rate * spans.shape[1]))[-1]
        predict_true = span_scores.new_zeros(span_scores.shape).scatter_(
            1, topk_idx, 1).bool()
        is_entity = (labels != 0).float()
        span_scores = span_scores.reshape(-1)
        is_entity = is_entity.reshape(-1)
        loss = self.loss(span_scores, is_entity)

        predict_true_flatten = predict_true.reshape(-1)
        predict_true_flatten = predict_true_flatten.unsqueeze(-1)
        predict_false_flatten = ~predict_true_flatten
        predict = torch.cat([predict_false_flatten, predict_true_flatten], -1)
        self._metric_f1(predict, is_entity, mask=span_mask.reshape(-1))

        predict_true |= labels.bool()
        output_dict = {"loss": loss, "predict_true": predict_true}
        return output_dict

    @overrides
    def get_metrics(self, reset: bool = False):
        metric = self._metric_f1.get_metric(reset)
        metric['precision'] = metric['precision'][1]
        metric['recall'] = metric['recall'][1]
        metric['fscore'] = metric['fscore'][1]

        return metric
Exemple #25
0
class Seq2SeqKnu(SimpleSeq2Seq):
    def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, target_namespace, decoder, attention,
                 max_decoding_steps: int, cuda_device: int):
        super().__init__(vocab,
                         source_embedder,
                         encoder,
                         max_decoding_steps,
                         use_bleu=False)

        self._decoder = decoder

        self._attention = attention

        self.acc = CategoricalAccuracy()
        self.label_acc = BooleanAccuracy()
        self.f1 = FBetaMeasure(average="macro")

        self.cuda_device = cuda_device

        self._target_namespace = target_namespace

        num_classes = self.vocab.get_vocab_size(self._target_namespace)

        # hidden 是decoder_hidden, attended_output 和 encoder_slice的拼接, 所以需要 * 3
        self._output_projection_layer = Linear(self._decoder_output_dim * 3,
                                               num_classes)

    def forward(
        self,
        source_tokens: Dict[str, torch.LongTensor],
        gold_mentions,
        target_tokens: Dict[str, torch.LongTensor] = None
    ) -> Dict[str, torch.Tensor]:
        """

        :param source_tokens: sentence序列化后
        :param gold_mentions: 表示第几个位置是mention
        :param target_tokens: mention对应的target
        :return:
        """
        # (batch_size, max_sentence_length, embedding_dim)
        state = self._encode(source_tokens)

        output_dict = self._forward_loop(state, gold_mentions, target_tokens)

        if not self.training:
            if target_tokens:
                logits = output_dict['logits']
                mention_mask = output_dict['mention_mask']
                target = target_tokens['tokens']
                predictions = output_dict['predictions']
                class_probs = output_dict['class_probs']

                self.label_acc(predictions, target, mention_mask)
                self.acc(logits, target, mention_mask)
                self.f1(class_probs, target, mention_mask)

        return output_dict

    def _forward_loop(
        self,
        state: Dict[str, torch.Tensor],
        gold_mentions: torch.LongTensor,
        target_tokens: Dict[str, torch.LongTensor] = None
    ) -> Dict[str, torch.Tensor]:
        # shape: (batch_size, max_input_sequence_length)
        source_mask = state["source_mask"]

        # shape: (batch_size, max_input_sequence_length, embedding_dim)
        encoder_outputs = state['encoder_outputs']

        batch_size = source_mask.size()[0]

        max_input_sequence_length = source_mask.size()[1]

        # 下面两步将gold_mention用0扩充到 (batch_size, max_input_sequence_length)
        gold_mentions_expanded = torch.zeros(
            batch_size, max_input_sequence_length).cuda(self.cuda_device)
        gold_mentions_expanded[:, :gold_mentions.size()[1]] = gold_mentions

        # 通过get_text_field_mask, 用0-1表示当前位置是否有效
        # shape: (batch_size, mac_input_sequence_length)
        mention_mask = util.get_text_field_mask(
            {'gold_mentions': gold_mentions_expanded})

        for b in range(batch_size):
            encoder_output = encoder_outputs[b]
            gold_mention = gold_mentions_expanded[b]
            # 选择对应mention的output,剩余的用0位置的output填充
            # 例如gold_mention = [3,5,0,0], 那么就选择3和5位置的output,并且用0位置的output填充矩阵剩余部分
            encoder_selected = torch.index_select(encoder_output, 0,
                                                  gold_mention.long())

            if b == 0:
                encoder_resorted = encoder_selected.unsqueeze(0)
            else:
                encoder_resorted = torch.cat(
                    (encoder_resorted, encoder_selected.unsqueeze(0)), 0)

        # 通过decoder进行输出
        # shape: (batch_size, max_sentence_length, num_classes)
        decoder_outputs = self._decode(encoder_resorted, mention_mask)

        # 按照token一个个计算
        token_logits = []
        token_predictions = []
        token_class_probs = []
        for i in range(max_input_sequence_length):
            encoder_slice = encoder_resorted[:, i, :]

            decoder_hidden = decoder_outputs[:, i, :]

            # source_mask_slice = source_mask[:, i].float()

            # TODO decoder hidden需要拼接上 h_encoder_t
            encoder_weights = self._attention(decoder_hidden, encoder_outputs,
                                              source_mask.float())

            # 加权求和
            # shape: (batch_size, hidden_dim)
            attended_output = util.weighted_sum(encoder_outputs,
                                                encoder_weights)

            # shape: (batch_size, hidden_dim * 3)
            hidden_attention_cat = torch.cat(
                (decoder_hidden, attended_output, encoder_slice), -1)

            # shape: (batch_size, num_classes)
            score = self._output_projection_layer(hidden_attention_cat)

            token_logits.append(score.unsqueeze(1))

            class_probabilities = F.softmax(score, dim=-1)

            token_class_probs.append(class_probabilities.unsqueeze(1))

            # shape (predicted_classes): (batch_size,)
            _, predicted_classes = torch.max(class_probabilities, 1)

            last_predictions = predicted_classes

            token_predictions.append(last_predictions.unsqueeze(1))

        predictions = torch.cat(token_predictions, 1)
        class_probs = torch.cat(token_class_probs, 1)
        # 裁切超过target长度的
        output_dict = {
            'predictions': predictions,
            'class_probs': class_probs.detach()
        }

        if target_tokens:

            targets = target_tokens['tokens']
            target_length = targets.size()[1]

            # 下面的步骤主要在做裁切,因为输出的shape是(batch_size, max_sentence_length, num_classes)
            # 而target是(batch_size, max_target_length) max_sentence_length 和 max_target_length不相等
            predictions_slice = predictions[:, :target_length]
            class_probs_slice = class_probs[:, :target_length, :]
            output_dict['predictions'] = predictions_slice
            output_dict['class_probs'] = class_probs_slice

            target_length = targets.size()[1]
            logits = torch.cat(token_logits, 1)
            # 裁切超过target长度的
            logits_slice = logits[:, :target_length, :].contiguous()
            targets = targets.contiguous()
            mention_mask = mention_mask[:, :target_length].contiguous()
            loss = util.sequence_cross_entropy_with_logits(
                logits_slice.float(), targets, mention_mask.float())
            output_dict['loss'] = loss
            output_dict['logits'] = logits_slice
            output_dict['mention_mask'] = mention_mask

        return output_dict

    def _decode(self, encoder_output, decode_mask):
        decoder_outputs = self._decoder(encoder_output, decode_mask)
        return decoder_outputs

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        all_metrics: Dict[str, float] = {}
        if not self.training:
            all_metrics.update({'accuracy': self.acc.get_metric(reset=reset)})
            all_metrics.update(
                {'label_accuracy': self.label_acc.get_metric(reset=reset)})
            all_metrics.update(
                {'f1': self.f1.get_metric(reset=reset)['fscore']})

        return all_metrics
class MaxMarginConditionalClassificationModel(MaxMarginConditionalModel):
    def __init__(
            self,
            num_entities: int,
            num_relations: int,
            embedding_dim: int,
            box_type: str = 'SigmoidBoxTensor',
            single_box: bool = False,
            softbox_temp: float = 10.,
            margin: float = 0.0,
            number_of_negative_samples: int = 0,
            debug: bool = False,
            regularization_weight: float = 0,
            init_interval_center: float = 0.25,
            init_interval_delta: float = 0.1,
            # adversarial_negative: bool = False,
            # adv_neg_softmax_temp: float = 0.8
    ) -> None:
        super().__init__(
            num_entities, num_relations, embedding_dim, box_type, single_box,
            softbox_temp, margin, number_of_negative_samples, debug,
            regularization_weight, init_interval_center, init_interval_delta)
        self.train_f1 = FBetaMeasure(average='micro')
        #self.valid_f1 = FBetaMeasure(average='micro')
        self.threshold_with_f1 = F1WithThreshold(flip_sign=True)
        self.istest = False
        self.test_threshold = None
        self.test_f1 = F1Measure(positive_label=1)

    def is_test(self) -> bool:
        if (not self.is_eval()) and self.test:
            raise RuntimeError("test flag is true but eval is false")

        return self.is_eval() and self.istest

    def test(self) -> None:
        if not self.is_eval():
            raise RuntimeError("test flag is true but eval is false")
        self.istest = True

    def get_ranks(self, embeddings: Dict[str, BoxTensor]) -> Any:
        if self.is_test():
            return self.get_test(embeddings)

        s = self._get_triple_score(embeddings['h'], embeddings['t'],
                                   embeddings['r'])
        # preds = torch.stack((p_s, n_s), dim=1)  # shape = (batch, 2)
        #self.valid_f1(preds, labels)
        labels = embeddings['label']
        # upate the metrics
        self.threshold_with_f1(s, labels)

        return {}

    def get_test(self, embeddings: Dict[str, BoxTensor]) -> Any:
        if self.test_threshold is None:
            raise RuntimeError("test_threshold should be set")
        s = self._get_triple_score(embeddings['h'], embeddings['t'],
                                   embeddings['r'])
        labels = embeddings['label']
        pos_prediction = (s > self.test_threshold).float()
        neg_prediction = 1.0 - pos_prediction
        predictions = torch.stack((neg_prediction, pos_prediction), -1)
        self.test_f1(predictions, labels)

        return {}

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        if self.is_eval():
            if not self.test:
                metrics = self.threshold_with_f1.get_metric(reset)
            else:
                p, r, f = self.test_f1.get_metric(reset)
                metrics = {'precision': p, 'recall': r, 'fscore': f}
        else:
            metrics = self.train_f1.get_metric(reset)
            metrics[
                'regularization_loss'] = self.regularization_loss.get_metric(
                    reset)

        return metrics

    def get_box_embeddings_val(self, h: torch.Tensor, t: torch.Tensor,
                               r: torch.Tensor,
                               label: torch.tensor) -> Dict[str, BoxTensor]:

        return BaseBoxModel.get_box_embeddings_val(
            self, h=h, t=t, r=r, label=label)

    def get_loss(self, scores: Tuple[torch.Tensor, torch.Tensor],
                 label: torch.Tensor) -> torch.Tensor:
        # max margin loss expects label to be float
        label = label.to(scores[0].dtype)
        loss = self.loss_f(*scores, label) + self.get_regularization_penalty()
        # metrics require 0,1 labels

        if not self.is_eval():
            with torch.no_grad():
                labels = torch.zeros_like(scores[0]).reshape(
                    -1)  # shape = (batch)
                preds = torch.stack(scores, dim=1)
                self.train_f1(preds, labels)

        return loss
class BCEBoxClassificationModel(BCEBoxModel):
    def __init__(self,
                 num_entities: int,
                 num_relations: int,
                 embedding_dim: int,
                 box_type: str = 'SigmoidBoxTensor',
                 single_box: bool = False,
                 softbox_temp: float = 10.,
                 number_of_negative_samples: int = 0,
                 debug: bool = False,
                 regularization_weight: float = 0,
                 init_interval_center: float = 0.25,
                 init_interval_delta: float = 0.1,
                 neg_samples_in_dataset_reader: int = 0) -> None:
        super().__init__(
            num_entities,
            num_relations,
            embedding_dim,
            box_type=box_type,
            single_box=single_box,
            softbox_temp=softbox_temp,
            number_of_negative_samples=number_of_negative_samples,
            debug=debug,
            regularization_weight=regularization_weight,
            init_interval_center=init_interval_center,
            init_interval_delta=init_interval_delta,
            neg_samples_in_dataset_reader=neg_samples_in_dataset_reader)
        self.train_f1 = FBetaMeasure(average='micro')
        # self.valid_f1 = FBetaMeasure(average='micro')
        self.threshold_with_f1 = F1WithThreshold(flip_sign=True)

        self.istest = False
        self.test_threshold = None
        # self.test_f1 = FBetaMeasure(average='macro')
        self.test_f1 = F1Measure(positive_label=1)

    def is_test(self) -> bool:
        if (not self.is_eval()) and self.test:
            raise RuntimeError("test flag is true but eval is false")

        return self.is_eval() and self.istest

    def test(self) -> None:
        if not self.is_eval():
            raise RuntimeError("test flag is true but eval is false")
        self.istest = True

    def get_box_embeddings_val(self, h: torch.Tensor, t: torch.Tensor,
                               r: torch.Tensor,
                               label: torch.Tensor) -> Dict[str, BoxTensor]:

        return BaseBoxModel.get_box_embeddings_val(self,
                                                   h=h,
                                                   t=t,
                                                   r=r,
                                                   label=label)

    def get_loss(self, scores: torch.Tensor,
                 label: torch.Tensor) -> torch.Tensor:
        log_p = scores
        log1mp = log1mexp(log_p)
        logits = torch.stack([log1mp, log_p], dim=-1)

        loss = self.loss_f(logits, label) + self.get_regularization_penalty()
        if torch.isnan(loss).any():
            breakpoint()
        if not self.is_eval():
            with torch.no_grad():
                self.train_f1(logits, label)

        return loss

    def get_ranks(self, embeddings: Dict[str, BoxTensor]) -> Any:
        if self.is_test():
            return self.get_test(embeddings)

        s = self._get_triple_score(embeddings['h'], embeddings['t'],
                                   embeddings['r'])
        # preds = torch.stack((p_s, n_s), dim=1)  # shape = (batch, 2)
        # self.valid_f1(preds, labels)
        labels = embeddings['label']
        # upate the metrics
        self.threshold_with_f1(s, labels)

        return {}

    def get_test(self, embeddings: Dict[str, BoxTensor]) -> Any:
        if self.test_threshold is None:
            raise RuntimeError("test_threshold should be set")
        s = self._get_triple_score(embeddings['h'], embeddings['t'],
                                   embeddings['r'])
        labels = embeddings['label']
        pos_prediction = (s > self.test_threshold).float()
        neg_prediction = 1.0 - pos_prediction
        predictions = torch.stack((neg_prediction, pos_prediction), -1)
        self.test_f1(predictions, labels)

        return {}

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        if self.is_eval():
            if not self.istest:
                metrics = self.threshold_with_f1.get_metric(reset)
            else:
                p, r, f = self.test_f1.get_metric(reset)
                metrics = {'precision': p, 'recall': r, 'fscore': f}

        else:
            metrics = self.train_f1.get_metric(reset)
            metrics[
                'regularization_loss'] = self.regularization_loss.get_metric(
                    reset)

        return metrics

    def get_regularization_penalty(self) -> Union[float, torch.Tensor]:

        if self.is_eval():
            return 0.0

        if self.regularization_weight > 0:
            all_ = self.h.all_boxes
            deltas = all_.Z - all_.z
            with torch.no_grad():
                assert (deltas >= 0.0).all()
            penalty = self.regularization_weight * torch.sum(deltas)
            # track the reg loss
            self.regularization_loss(penalty.item())

            return penalty
        else:
            return 0.0
class RationaleBaseModel(Model):
    def __init__(
        self,
        vocab: Vocabulary,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):
        super(RationaleBaseModel, self).__init__(vocab, regularizer)
        self._vocabulary = vocab
        self._f1_metric = FBetaMeasure()
        self._accuracy = CategoricalAccuracy()

        self.prediction_mode = False

        initializer(self)

    def forward(self,
                document,
                query=None,
                labels=None,
                metadata=None,
                **kwargs):
        # pylint: disable=arguments-differ

        raise NotImplementedError

    def decode(self, output_dict):
        output_dict = self._decode(output_dict)
        output_labels = self._vocabulary.get_index_to_token_vocabulary(
            "labels")

        predicted_labels, gold_labels = [], []
        for p, g in zip(output_dict["predicted_label"], output_dict["label"]):
            predicted_labels.append(output_labels[int(p)])
            gold_labels.append(output_labels[int(g)])

        output_dict["predicted_label"] = predicted_labels
        output_dict["label"] = gold_labels
        output_dict["annotation_id"] = [
            d['annotation_id'] for d in output_dict['metadata']
        ]

        del output_dict['metadata']

        return output_dict

    def _call_metrics(self, output_dict):
        self._f1_metric(output_dict["probs"], output_dict["gold_labels"])
        self._accuracy(output_dict["probs"], output_dict["gold_labels"])

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = self._f1_metric.get_metric(reset)
        macro_avg = {'macro_' + k: sum(v) / len(v) for k, v in metrics.items()}
        output_labels = self._vocabulary.get_index_to_token_vocabulary(
            "labels")
        output_labels = [output_labels[i] for i in range(len(output_labels))]

        class_metrics = {}
        for k, v in metrics.items():
            assert len(v) == len(output_labels)
            class_nums = dict(zip(output_labels, v))
            class_metrics.update(
                {k + "_" + str(kc): x
                 for kc, x in class_nums.items()})

        class_metrics.update({"accuracy": self._accuracy.get_metric(reset)})
        class_metrics.update(macro_avg)
        modified_class_metrics = {}

        for k, v in class_metrics.items():
            if k in ["accuracy", "macro_fscore"]:
                modified_class_metrics[k] = v
            else:
                modified_class_metrics["_" + k] = v

        modified_class_metrics["validation_metric"] = class_metrics[
            "macro_fscore"]

        return modified_class_metrics

    def normalize_attentions(self, output_dict):
        """
        In case, attention is over subtokens rather than at token level. 
        Combine subtoken attention into token attention.
        """

        return output_dict

    def combine_document_query(self, document, query):
        reader = document[0]["reader_object"]
        device = next(self.parameters()).device
        return {
            k: ({x: y.to(device)
                 for x, y in v.items()} if type(v) == dict else v.to(device))
            for k, v in reader.combine_document_query(
                document, query, self._vocabulary).items()
        }
Exemple #29
0
class SimpleTagger(Model):
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 dropout: float = 0.1,
                 ff_dim: int = 100):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder

        assert self.embedder.get_output_dim() == self.encoder.get_input_dim()

        self.feedforward = FeedForward(
            encoder.get_output_dim(),
            1,
            hidden_dims=ff_dim,
            activations=Activation.by_name('relu')(),
            dropout=dropout)
        self.out = torch.nn.Linear(
            in_features=self.feedforward.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))
        self.crf = ConditionalRandomField(vocab.get_vocab_size('labels'))

        self.f1 = FBetaMeasure(average='micro')
        self.accuracy = CategoricalAccuracy()
        self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')

    def forward(self, tokens: Dict[str, torch.Tensor],
                tags: torch.Tensor) -> Dict[str, torch.Tensor]:
        mask = get_text_field_mask(tokens)
        embeddings = self.embedder(tokens)
        encoder_out = self.encoder(embeddings, mask)
        encoder_out = self.feedforward(encoder_out)
        logits = self.out(encoder_out)
        output = {"logits": logits, "mask": mask}
        if tags is not None:
            self.accuracy(logits, tags, mask)
            self.f1(logits, tags, mask)
            output['loss'] = -self.crf(logits, tags, mask)
        else:
            output["logits"] = self.crf.viterbi_tags(logits, mask)
        return output

    def decode(self, output_dict: Dict[str, torch.Tensor]):

        logits = output_dict["logits"]
        mask = output_dict["mask"]
        tag_logits = torch.argmax(logits, dim=2).tolist()
        lengths = torch.sum(mask, dim=1).tolist()
        all_labels = []
        for sample_num in range(len(tag_logits)):
            labels = []
            for label_idx in range(lengths[sample_num]):
                labels.append(
                    self.idx_to_label[tag_logits[sample_num][label_idx]])
            all_labels.append(labels)
        return {"labels": all_labels}

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        out = {"accuracy": self.accuracy.get_metric(reset)}
        out.update(self.f1.get_metric(reset))
        return out
Exemple #30
0
class ClassificationModel(Model, ABC, metaclass=ABCMeta):
    def __init__(
        self,
        vocab: Vocabulary,
        classification_type: str = 'multi-class',
        pos_label: str = None,
        threshold: float = 0.5,
        neg_weight: float = 1.0,
        label_namespace: str = 'labels',
        regularizer: Optional[RegularizerApplicator] = None,
    ):
        super().__init__(vocab, regularizer)

        self._classification_type = classification_type
        self._label_namespace = label_namespace
        self._threshold = threshold
        self._neg_weight = neg_weight

        self._pos_label_index = vocab.get_token_index(
            pos_label, namespace=label_namespace) if pos_label else None

        self._use_threshold = False

        if self._classification_type == "ce":
            self._loss = torch.nn.CrossEntropyLoss()
            self._accuracy = CategoricalAccuracy()
            if self._pos_label_index is not None:
                self._f1 = FBetaMeasure(average=None)
            else:
                self._f1 = FBetaMeasure(average='micro')

        elif self._classification_type == "bce":
            # BCE 是否可以指定全负样本
            assert self._pos_label_index is None
            self._loss = torch.nn.BCEWithLogitsLoss()
            self._accuracy = BooleanAccuracy()
            self._f1 = BooleanF1()
            self._use_threshold = True

        elif self._classification_type == "as":
            # AS should given _pos_label_index
            assert self._pos_label_index is not None
            self._loss = AdaptiveScalingLossLayer(
                num_label=vocab.get_vocab_size(label_namespace),
                positive_idx=[self._pos_label_index])
            self._accuracy = CategoricalAccuracy()
            self._f1 = FBetaMeasure(average=None)

        else:
            raise NotImplementedError(
                'Classification Type Not Implemented: %s' %
                self._classification_type)

    def get_output_dict(
        self,
        logits,
        label=None,
        metadata=None,
    ):

        if self._use_threshold:
            probs = torch.sigmoid(logits)
        else:
            probs = torch.nn.functional.softmax(logits, dim=-1)

        output_dict = {
            "logits": logits,
            "probs": probs,
        }

        if metadata:
            output_dict["metadata"] = metadata

        if label is not None:
            if self._use_threshold:
                loss = self._loss(logits, label.float())
                self._accuracy(logits > 0.5, label.bool())
                self._f1(logits > 0.5, label.bool())
                output_dict['loss'] = loss
            else:
                loss = self._loss(logits, label)
                # _, pred = torch.max(logits, -1)
                self._accuracy(logits, label)
                self._f1(logits, label)
                output_dict['loss'] = loss

        return output_dict

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = self._f1.get_metric(reset)

        if self._pos_label_index is not None:
            # 0 is None
            metrics = {
                key: value[self._pos_label_index]
                for key, value in metrics.items()
            }

        accuracy = self._accuracy.get_metric(reset)
        metrics.update({'accuracy': accuracy})
        metrics['precision'] = metrics['precision'] * 100
        metrics['recall'] = metrics['recall'] * 100
        metrics['fscore'] = metrics['fscore'] * 100
        metrics['accuracy'] = metrics['accuracy'] * 100
        return metrics

    def decode(
            self, output_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Decode probs to label
            classification_type is ``multi-class``
                Does a simple argmax over the probabilities,
            classification_type is ``multi-label``
                Does a simple threshold filter over the probabilities,
        converts index to string label, and add ``"label"`` key to the dictionary with the result.
        """

        predictions = output_dict["probs"].cpu()
        if self._use_threshold:
            if predictions.dim() == 2:
                predictions_list = [
                    predictions[i] for i in range(predictions.shape[0])
                ]
            else:
                predictions_list = [predictions]
            classes = []
            for prediction in predictions_list:
                label_str = list()
                for label_idx, predict in enumerate(
                        prediction > self._threshold):
                    if not predict:
                        continue
                    label_str += [
                        self.vocab.get_token_from_index(
                            label_idx, namespace=self._label_namespace)
                    ]
                classes.append(label_str)
            output_dict["label"] = classes
        else:
            if predictions.dim() == 2:
                predictions_list = [
                    predictions[i] for i in range(predictions.shape[0])
                ]
            else:
                predictions_list = [predictions]
            classes = []
            for prediction in predictions_list:
                label_idx = prediction.argmax(dim=-1).item()
                label_str = self.vocab.get_token_from_index(
                    label_idx, namespace=self._label_namespace)
                classes.append(label_str)
            output_dict["label"] = classes
        return output_dict