def test_merge_two_full_dictionaries(self): a = {"a": 1, "b": 2} b = {"a": 2, "b": 4, "c": 0} expected = {"a": 3, "b": 6, "c": 0} a_b = merge_dictionaries_with_sum(a, b) assert a_b == expected
exp_name = f"{exp_name}_{fold_num}" engine = setup_engine_once( experiment_name=exp_name, config_dict=copy.deepcopy(config), train_data_filepath=train_conll_filepath, test_data_filepath=test_conll_filepath, ) # generating one path for every fold run engine.run() fold_tp_counter = engine.test_metric_calc.tp_counter fold_fp_counter = engine.test_metric_calc.fp_counter fold_fn_counter = engine.test_metric_calc.fn_counter tp_counter = merge_dictionaries_with_sum(tp_counter, fold_tp_counter) fp_counter = merge_dictionaries_with_sum(fp_counter, fold_fp_counter) fn_counter = merge_dictionaries_with_sum(fn_counter, fold_fn_counter) parscit_classname2idx = ParscitDataset.get_classname2idx() idx2_classname = { idx: classname for classname, idx in parscit_classname2idx.items() } ignore_indices = [ parscit_classname2idx["starting"], parscit_classname2idx["ending"], parscit_classname2idx["padding"], ] classification_metrics_utils = ClassificationMetricsUtils(
def test_merge_dictionaries_one_zero(self): a = {"a": 0} b = {"a": 0, "b": 1} a_b = merge_dictionaries_with_sum(a, b) expected = {"a": 0, "b": 1} assert a_b == expected
def calc_metric( self, lines: List[Line], labels: List[SeqLabel], model_forward_dict: Dict[str, Any], ) -> None: """ Parameters ---------------- lines: List[Line] The list of lines labels: List[Label] The list of sequence labels model_forward_dict: Dict[str, Any] The model_forward_dict should have predicted tags for every namespace The predicted_tags are the best possible predicted tags for the batch They are List[List[int]] where the size is ``[batch_size, time_steps]`` We expect that the predicted tags are """ # get true labels for all namespaces namespace_to_true_labels = defaultdict(list) namespace_to_true_labels_mask = defaultdict(list) namespace_to_pred_labels_mask = defaultdict(list) for namespace in self.label_namespaces: # List[List[int]] predicted_tags = model_forward_dict.get( f"{self.predicted_tags_namespace_prefix}_{namespace}") max_length = max([len(tags) for tags in predicted_tags]) # max num tokens numericalizer = self.datasets_manager.namespace_to_numericalizer[ namespace] pred_tags_mask = numericalizer.get_mask_for_batch_instances( instances=predicted_tags).tolist() namespace_to_pred_labels_mask[namespace] = pred_tags_mask for label in labels: true_labels = label.tokens[namespace] true_labels = [tok.text for tok in true_labels] true_labels = numericalizer.numericalize_instance( instance=true_labels) true_labels = numericalizer.pad_instance( numericalized_text=true_labels, max_length=max_length, add_start_end_token=False, ) labels_mask = numericalizer.get_mask_for_instance( instance=true_labels).tolist() namespace_to_true_labels[namespace].append(true_labels) namespace_to_true_labels_mask[namespace].append(labels_mask) for namespace in self.label_namespaces: labels_ = namespace_to_true_labels[namespace] labels_mask_ = namespace_to_true_labels_mask[namespace] pred_labels_mask_ = namespace_to_pred_labels_mask[namespace] # List[List[int]] predicted_tags = model_forward_dict.get( f"{self.predicted_tags_namespace_prefix}_{namespace}") ( confusion_mtrx, classes, ) = self.classification_metrics_utils.get_confusion_matrix_and_labels( true_tag_indices=labels_, predicted_tag_indices=predicted_tags, true_masked_label_indices=labels_mask_, pred_labels_mask=pred_labels_mask_, ) tps = np.around(np.diag(confusion_mtrx), decimals=4) fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4) fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4) tps = tps.tolist() fps = fps.tolist() fns = fns.tolist() class_tps_mapping = dict(zip(classes, tps)) class_fps_mapping = dict(zip(classes, fps)) class_fns_mapping = dict(zip(classes, fns)) self.tp_counter[namespace] = merge_dictionaries_with_sum( self.tp_counter.get(namespace, {}), class_tps_mapping) self.fp_counter[namespace] = merge_dictionaries_with_sum( self.fp_counter.get(namespace, {}), class_fps_mapping) self.fn_counter[namespace] = merge_dictionaries_with_sum( self.fn_counter.get(namespace, {}), class_fns_mapping)
def calc_metric( self, iter_dict: Dict[str, Any], model_forward_dict: Dict[str, Any] ) -> None: """ Updates the values being tracked for calculating the metric For Precision Recall FMeasure we update the true positive, false positive and false negative of the different classes being tracked Parameters ---------- iter_dict : Dict[str, Any] The ``iter_dict`` from the dataset is expected to have ``label`` which are labels for instances. They are usually of the size ``[batch_size]`` Optionally there can be a ``label_mask`` of the size ``[batch_size]`` The ``label_mask`` is 1 where the label should be masked otherwise if the label is not masked then it is 0 model_forward_dict : Dict[str, Any] The dictionary obtained after a forward pass The model_forward_pass is expected to have ``normalized_probs`` that usually is of the size ``[batch_size, num_classes]`` """ normalized_probs = model_forward_dict["normalized_probs"] labels = iter_dict["label"] labels_mask = iter_dict.get("label_mask") if labels_mask is None: labels_mask = torch.zeros_like(labels).type(torch.ByteTensor) normalized_probs = normalized_probs.cpu() labels = labels.cpu() assert normalized_probs.ndimension() == 2, self.msg_printer.fail( "The predicted probs should " "have 2 dimensions. The probs " "that you passed have shape " "{0}".format(normalized_probs.size()) ) assert labels.ndimension() == 2, self.msg_printer.fail( "The labels should have 2 dimension." "The labels that you passed have shape " "{0}".format(labels.size()) ) # TODO: for now k=1, change it to different number of ks top_probs, top_indices = normalized_probs.topk(k=1, dim=1) # convert to 1d numpy top_indices_numpy = top_indices.cpu().numpy().tolist() # convert labels to 1 dimension true_labels_numpy = labels.cpu().numpy().tolist() labels_mask = labels_mask.tolist() confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels( true_tag_indices=true_labels_numpy, predicted_tag_indices=top_indices_numpy, masked_label_indices=labels_mask, ) # For further confirmation on how I calculated this I searched for stackoverflow on # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns # You can refer to https://stackoverflow.com/a/43331484/2704763 # calculate tps tps = np.around(np.diag(confusion_mtrx), decimals=4) # calculate fps fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4) # calculate fns fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4) tps = tps.tolist() fps = fps.tolist() fns = fns.tolist() class_tps_mapping = dict(zip(classes, tps)) class_fps_mapping = dict(zip(classes, fps)) class_fns_mapping = dict(zip(classes, fns)) self.tp_counter = merge_dictionaries_with_sum( self.tp_counter, class_tps_mapping ) self.fp_counter = merge_dictionaries_with_sum( self.fp_counter, class_fps_mapping ) self.fn_counter = merge_dictionaries_with_sum( self.fn_counter, class_fns_mapping )
def calc_metric(self, lines: List[Line], labels: List[Label], model_forward_dict: Dict[str, Any]) -> None: """ Updates the values being tracked for calculating the metric For Precision Recall FMeasure we update the true positive, false positive and false negative of the different classes being tracked Parameters ---------- lines : List[Line] A list of lines labels: List[Label] A list of labels. This has to be the label used for classification Refer to the documentation of Label for more information model_forward_dict : Dict[str, Any] The dictionary obtained after a forward pass The model_forward_pass is expected to have ``normalized_probs`` that usually is of the size ``[batch_size, num_classes]`` """ normalized_probs = model_forward_dict[self.normalized_probs_namespace] labels_tensor = [] for label in labels: tokens = label.tokens[self.label_namespace] tokens = [tok.text for tok in tokens] numericalized_instance = self.label_numericalizer.numericalize_instance( instance=tokens) labels_tensor.extend(numericalized_instance) labels_tensor = torch.LongTensor(labels_tensor) labels_tensor = labels_tensor.view(-1, 1) labels_mask = torch.zeros_like(labels_tensor).type(torch.ByteTensor) normalized_probs = normalized_probs.cpu() assert normalized_probs.ndimension() == 2, self.msg_printer.fail( "The predicted probs should " "have 2 dimensions. The probs " "that you passed have shape " "{0}".format(normalized_probs.size())) assert labels_tensor.ndimension() == 2, self.msg_printer.fail( "The labels should have 2 dimension." "The labels that you passed have shape " "{0}".format(labels_tensor.size())) # TODO: for now k=1, change it to different number of ks top_probs, top_indices = normalized_probs.topk(k=1, dim=1) # convert to 1d numpy top_indices_numpy = top_indices.cpu().numpy().tolist() # convert labels to 1 dimension true_labels_numpy = labels_tensor.cpu().numpy().tolist() labels_mask = labels_mask.tolist() ( confusion_mtrx, classes, ) = self.classification_metrics_utils.get_confusion_matrix_and_labels( true_tag_indices=true_labels_numpy, predicted_tag_indices=top_indices_numpy, true_masked_label_indices=labels_mask, ) # For further confirmation on how I calculated this I searched for stackoverflow on # 18th of July 2019. This seems to be the correct way to calculate tps, fps, fns # You can refer to https://stackoverflow.com/a/43331484/2704763 # calculate tps tps = np.around(np.diag(confusion_mtrx), decimals=4) # calculate fps fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4) # calculate fns fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4) tps = tps.tolist() fps = fps.tolist() fns = fns.tolist() class_tps_mapping = dict(zip(classes, tps)) class_fps_mapping = dict(zip(classes, fps)) class_fns_mapping = dict(zip(classes, fns)) self.tp_counter = merge_dictionaries_with_sum(self.tp_counter, class_tps_mapping) self.fp_counter = merge_dictionaries_with_sum(self.fp_counter, class_fps_mapping) self.fn_counter = merge_dictionaries_with_sum(self.fn_counter, class_fns_mapping)
def calc_metric(self, iter_dict: Dict[str, Any], model_forward_dict: Dict[str, Any]) -> None: """ Parameters ---------------- iter_dict: Dict[str, Any] The ``iter_dict`` should have label key The ``label`` are gold labels for the batch They should have the shape ``[batch_size, time_steps]`` where time_steps are the size of the sequence model_forward_dict: Dict[str, Any] The model_forward_dict should have ``predicted_tags`` key The ``predicted_tags`` are the best possible predicted tags for the batch They are List[List[int]] where the size is ``[batch_size, time_steps]`` """ labels = iter_dict.get("label", None) labels = labels.cpu() predicted_tags = model_forward_dict.get("predicted_tags", None) # List[List[int]] labels_mask = iter_dict.get("label_mask") if labels_mask is None: labels_mask = torch.zeros_like(labels).type(torch.ByteTensor) if labels is None or predicted_tags is None: raise ValueError( f"While calling {self.__class__.__name__}, the iter_dict should" f"have a key called label and model_forward_dict " f"should have predicted_tags") assert labels.ndimension() == 2, self.msg_printer.fail( f"The labels for the metric {self.__class__.__name__} should have 2 dimensions." f"The labels that you passed have the shape {labels.size()}") # flatten predicted tags to a single dimension confusion_mtrx, classes = self.classification_metrics_utils.get_confusion_matrix_and_labels( true_tag_indices=labels.numpy().tolist(), predicted_tag_indices=predicted_tags, masked_label_indices=labels_mask.cpu().numpy().tolist(), ) tps = np.around(np.diag(confusion_mtrx), decimals=4) fps = np.around(np.sum(confusion_mtrx, axis=0) - tps, decimals=4) fns = np.around(np.sum(confusion_mtrx, axis=1) - tps, decimals=4) tps = tps.tolist() fps = fps.tolist() fns = fns.tolist() class_tps_mapping = dict(zip(classes, tps)) class_fps_mapping = dict(zip(classes, fps)) class_fns_mapping = dict(zip(classes, fns)) self.tp_counter = merge_dictionaries_with_sum(self.tp_counter, class_tps_mapping) self.fp_counter = merge_dictionaries_with_sum(self.fp_counter, class_fps_mapping) self.fn_counter = merge_dictionaries_with_sum(self.fn_counter, class_fns_mapping)