Ejemplo n.º 1
0
 def test_merge_two_full_dictionaries(self):
     a = {"a": 1, "b": 2}
     b = {"a": 2, "b": 4, "c": 0}
     expected = {"a": 3, "b": 6, "c": 0}
     a_b = merge_dictionaries_with_sum(a, b)
     assert a_b == expected
Ejemplo n.º 2
0
        exp_name = f"{exp_name}_{fold_num}"

        engine = setup_engine_once(
            experiment_name=exp_name,
            config_dict=copy.deepcopy(config),
            train_data_filepath=train_conll_filepath,
            test_data_filepath=test_conll_filepath,
        )
        # generating one path for every fold run
        engine.run()

        fold_tp_counter = engine.test_metric_calc.tp_counter
        fold_fp_counter = engine.test_metric_calc.fp_counter
        fold_fn_counter = engine.test_metric_calc.fn_counter

        tp_counter = merge_dictionaries_with_sum(tp_counter, fold_tp_counter)
        fp_counter = merge_dictionaries_with_sum(fp_counter, fold_fp_counter)
        fn_counter = merge_dictionaries_with_sum(fn_counter, fold_fn_counter)

    parscit_classname2idx = ParscitDataset.get_classname2idx()
    idx2_classname = {
        idx: classname
        for classname, idx in parscit_classname2idx.items()
    }
    ignore_indices = [
        parscit_classname2idx["starting"],
        parscit_classname2idx["ending"],
        parscit_classname2idx["padding"],
    ]

    classification_metrics_utils = ClassificationMetricsUtils(
Ejemplo n.º 3
0
 def test_merge_dictionaries_one_zero(self):
     a = {"a": 0}
     b = {"a": 0, "b": 1}
     a_b = merge_dictionaries_with_sum(a, b)
     expected = {"a": 0, "b": 1}
     assert a_b == expected
Ejemplo n.º 4
0
    def calc_metric(
        self,
        lines: List[Line],
        labels: List[SeqLabel],
        model_forward_dict: Dict[str, Any],
    ) -> None:
        """

        Parameters
        ----------------
        lines: List[Line]
            The list of lines

        labels: List[Label]
            The list of sequence labels

        model_forward_dict: Dict[str, Any]
            The model_forward_dict should have predicted tags for every namespace
            The predicted_tags are the best possible predicted tags for the batch
            They are List[List[int]] where the size is ``[batch_size, time_steps]``
            We expect that the predicted tags are

        """

        # get true labels for all namespaces
        namespace_to_true_labels = defaultdict(list)
        namespace_to_true_labels_mask = defaultdict(list)
        namespace_to_pred_labels_mask = defaultdict(list)

        for namespace in self.label_namespaces:
            # List[List[int]]
            predicted_tags = model_forward_dict.get(
                f"{self.predicted_tags_namespace_prefix}_{namespace}")
            max_length = max([len(tags)
                              for tags in predicted_tags])  # max num tokens
            numericalizer = self.datasets_manager.namespace_to_numericalizer[
                namespace]
            pred_tags_mask = numericalizer.get_mask_for_batch_instances(
                instances=predicted_tags).tolist()
            namespace_to_pred_labels_mask[namespace] = pred_tags_mask

            for label in labels:
                true_labels = label.tokens[namespace]
                true_labels = [tok.text for tok in true_labels]

                true_labels = numericalizer.numericalize_instance(
                    instance=true_labels)
                true_labels = numericalizer.pad_instance(
                    numericalized_text=true_labels,
                    max_length=max_length,
                    add_start_end_token=False,
                )
                labels_mask = numericalizer.get_mask_for_instance(
                    instance=true_labels).tolist()
                namespace_to_true_labels[namespace].append(true_labels)
                namespace_to_true_labels_mask[namespace].append(labels_mask)

        for namespace in self.label_namespaces:
            labels_ = namespace_to_true_labels[namespace]
            labels_mask_ = namespace_to_true_labels_mask[namespace]
            pred_labels_mask_ = namespace_to_pred_labels_mask[namespace]
            # List[List[int]]
            predicted_tags = model_forward_dict.get(
                f"{self.predicted_tags_namespace_prefix}_{namespace}")

            (
                confusion_mtrx,
                classes,
            ) = self.classification_metrics_utils.get_confusion_matrix_and_labels(
                true_tag_indices=labels_,
                predicted_tag_indices=predicted_tags,
                true_masked_label_indices=labels_mask_,
                pred_labels_mask=pred_labels_mask_,
            )

            tps = np.around(np.diag(confusion_mtrx), decimals=4)
            fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)
            fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

            tps = tps.tolist()
            fps = fps.tolist()
            fns = fns.tolist()

            class_tps_mapping = dict(zip(classes, tps))
            class_fps_mapping = dict(zip(classes, fps))
            class_fns_mapping = dict(zip(classes, fns))

            self.tp_counter[namespace] = merge_dictionaries_with_sum(
                self.tp_counter.get(namespace, {}), class_tps_mapping)
            self.fp_counter[namespace] = merge_dictionaries_with_sum(
                self.fp_counter.get(namespace, {}), class_fps_mapping)
            self.fn_counter[namespace] = merge_dictionaries_with_sum(
                self.fn_counter.get(namespace, {}), class_fns_mapping)
Ejemplo n.º 5
0
    def calc_metric(
        self, iter_dict: Dict[str, Any], model_forward_dict: Dict[str, Any]
    ) -> None:
        """ Updates the values being tracked for calculating the metric

        For Precision Recall FMeasure we update the true positive,
        false positive and false negative of the different classes
        being tracked

        Parameters
        ----------
        iter_dict : Dict[str, Any]
            The ``iter_dict`` from the dataset is expected to have
            ``label`` which are labels for instances. They are usually
            of the size ``[batch_size]``
            Optionally there can be a ``label_mask`` of the size ``[batch_size]``
            The ``label_mask`` is 1 where the label should be masked otherwise
            if the label is not masked then it is 0

        model_forward_dict : Dict[str, Any]
            The dictionary obtained after a forward pass
            The model_forward_pass is expected to have ``normalized_probs``
            that usually is of the size ``[batch_size, num_classes]``
        """

        normalized_probs = model_forward_dict["normalized_probs"]
        labels = iter_dict["label"]
        labels_mask = iter_dict.get("label_mask")
        if labels_mask is None:
            labels_mask = torch.zeros_like(labels).type(torch.ByteTensor)

        normalized_probs = normalized_probs.cpu()
        labels = labels.cpu()

        assert normalized_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(normalized_probs.size())
        )

        assert labels.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels.size())
        )

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = normalized_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels.cpu().numpy().tolist()

        labels_mask = labels_mask.tolist()

        confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            true_tag_indices=true_labels_numpy,
            predicted_tag_indices=top_indices_numpy,
            masked_label_indices=labels_mask,
        )

        # For further confirmation on how I calculated this I searched for stackoverflow on
        # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns
        # You can refer to https://stackoverflow.com/a/43331484/2704763

        # calculate tps
        tps = np.around(np.diag(confusion_mtrx), decimals=4)

        # calculate fps
        fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)

        # calculate fns
        fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

        tps = tps.tolist()
        fps = fps.tolist()
        fns = fns.tolist()

        class_tps_mapping = dict(zip(classes, tps))
        class_fps_mapping = dict(zip(classes, fps))
        class_fns_mapping = dict(zip(classes, fns))

        self.tp_counter = merge_dictionaries_with_sum(
            self.tp_counter, class_tps_mapping
        )
        self.fp_counter = merge_dictionaries_with_sum(
            self.fp_counter, class_fps_mapping
        )
        self.fn_counter = merge_dictionaries_with_sum(
            self.fn_counter, class_fns_mapping
        )
Ejemplo n.º 6
0
    def calc_metric(self, lines: List[Line], labels: List[Label],
                    model_forward_dict: Dict[str, Any]) -> None:
        """ Updates the values being tracked for calculating the metric

        For Precision Recall FMeasure we update the true positive,
        false positive and false negative of the different classes
        being tracked

        Parameters
        ----------
        lines : List[Line]
           A list of lines
        labels: List[Label]
            A list of labels. This has to be the label used for classification
            Refer to the documentation of Label for more information

        model_forward_dict : Dict[str, Any]
            The dictionary obtained after a forward pass
            The model_forward_pass is expected to have ``normalized_probs``
            that usually is of the size ``[batch_size, num_classes]``
        """

        normalized_probs = model_forward_dict[self.normalized_probs_namespace]

        labels_tensor = []
        for label in labels:
            tokens = label.tokens[self.label_namespace]
            tokens = [tok.text for tok in tokens]
            numericalized_instance = self.label_numericalizer.numericalize_instance(
                instance=tokens)

            labels_tensor.extend(numericalized_instance)

        labels_tensor = torch.LongTensor(labels_tensor)
        labels_tensor = labels_tensor.view(-1, 1)
        labels_mask = torch.zeros_like(labels_tensor).type(torch.ByteTensor)

        normalized_probs = normalized_probs.cpu()

        assert normalized_probs.ndimension() == 2, self.msg_printer.fail(
            "The predicted probs should "
            "have 2 dimensions. The probs "
            "that you passed have shape "
            "{0}".format(normalized_probs.size()))

        assert labels_tensor.ndimension() == 2, self.msg_printer.fail(
            "The labels should have 2 dimension."
            "The labels that you passed have shape "
            "{0}".format(labels_tensor.size()))

        # TODO: for now k=1, change it to different number of ks
        top_probs, top_indices = normalized_probs.topk(k=1, dim=1)

        # convert to 1d numpy
        top_indices_numpy = top_indices.cpu().numpy().tolist()

        # convert labels to 1 dimension
        true_labels_numpy = labels_tensor.cpu().numpy().tolist()

        labels_mask = labels_mask.tolist()

        (
            confusion_mtrx,
            classes,
        ) = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            true_tag_indices=true_labels_numpy,
            predicted_tag_indices=top_indices_numpy,
            true_masked_label_indices=labels_mask,
        )

        # For further confirmation on how I calculated this I searched for stackoverflow on
        # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns
        # You can refer to https://stackoverflow.com/a/43331484/2704763

        # calculate tps
        tps = np.around(np.diag(confusion_mtrx), decimals=4)

        # calculate fps
        fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)

        # calculate fns
        fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

        tps = tps.tolist()
        fps = fps.tolist()
        fns = fns.tolist()

        class_tps_mapping = dict(zip(classes, tps))
        class_fps_mapping = dict(zip(classes, fps))
        class_fns_mapping = dict(zip(classes, fns))

        self.tp_counter = merge_dictionaries_with_sum(self.tp_counter,
                                                      class_tps_mapping)
        self.fp_counter = merge_dictionaries_with_sum(self.fp_counter,
                                                      class_fps_mapping)
        self.fn_counter = merge_dictionaries_with_sum(self.fn_counter,
                                                      class_fns_mapping)
Ejemplo n.º 7
0
    def calc_metric(self, iter_dict: Dict[str, Any],
                    model_forward_dict: Dict[str, Any]) -> None:
        """

        Parameters
        ----------------
        iter_dict: Dict[str, Any]
            The ``iter_dict`` should have label key
            The ``label`` are gold labels for the batch
            They should have the shape ``[batch_size, time_steps]``
            where time_steps are the size of the sequence

        model_forward_dict: Dict[str, Any]
            The model_forward_dict should have ``predicted_tags`` key
            The ``predicted_tags`` are the best possible predicted tags for the batch
            They are List[List[int]] where the size is ``[batch_size, time_steps]``

        """
        labels = iter_dict.get("label", None)
        labels = labels.cpu()
        predicted_tags = model_forward_dict.get("predicted_tags",
                                                None)  # List[List[int]]

        labels_mask = iter_dict.get("label_mask")
        if labels_mask is None:
            labels_mask = torch.zeros_like(labels).type(torch.ByteTensor)

        if labels is None or predicted_tags is None:
            raise ValueError(
                f"While calling {self.__class__.__name__}, the iter_dict should"
                f"have a key called label and model_forward_dict "
                f"should have predicted_tags")

        assert labels.ndimension() == 2, self.msg_printer.fail(
            f"The labels  for the metric {self.__class__.__name__} should have 2 dimensions."
            f"The labels that you passed have the shape {labels.size()}")

        # flatten predicted tags to a single dimension
        confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels(
            true_tag_indices=labels.numpy().tolist(),
            predicted_tag_indices=predicted_tags,
            masked_label_indices=labels_mask.cpu().numpy().tolist(),
        )

        tps = np.around(np.diag(confusion_mtrx), decimals=4)
        fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4)
        fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4)

        tps = tps.tolist()
        fps = fps.tolist()
        fns = fns.tolist()

        class_tps_mapping = dict(zip(classes, tps))
        class_fps_mapping = dict(zip(classes, fps))
        class_fns_mapping = dict(zip(classes, fns))

        self.tp_counter = merge_dictionaries_with_sum(self.tp_counter,
                                                      class_tps_mapping)
        self.fp_counter = merge_dictionaries_with_sum(self.fp_counter,
                                                      class_fps_mapping)
        self.fn_counter = merge_dictionaries_with_sum(self.fn_counter,
                                                      class_fns_mapping)