def test_metric_tracker_best(): """ 测试 metric tracker :return: """ metric_tracker = MetricTracker(patient=None) for metric in METRICS: metric_tracker.add_metric(**metric) expect = {"epoch": 3, "train_metric": {"acc": 0.85}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85), "validation_metric": {"acc": 0.60}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)} best = metric_tracker.best() ASSERT.assertEqual(expect["epoch"], best.epoch) ASSERT.assertDictEqual(expect["train_metric"], best.train_metric) ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric) ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name) ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value) ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name) ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
def test_metric_tracker_patient(): metric_tracker = MetricTracker(patient=1) for metric in METRICS: metric_tracker.add_metric(**metric) if metric["epoch"] > 4: ASSERT.assertTrue(metric_tracker.early_stopping(metric["epoch"])) else: ASSERT.assertFalse(metric_tracker.early_stopping(metric["epoch"])) if metric_tracker.early_stopping(metric["epoch"]): break expect = {"epoch": 3, "train_metric": {"acc": 0.85}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85), "validation_metric": {"acc": 0.60}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)} best = metric_tracker.best() ASSERT.assertEqual(expect["epoch"], best.epoch) ASSERT.assertDictEqual(expect["train_metric"], best.train_metric) ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric) ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name) ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value) ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name) ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
def from_file(cls, file_path: str): """ 从文件中载入 metric tracker :param file_path: 文件路径 :return: """ with open(file_path, mode="r", encoding="utf-8") as f: metric_tracker = cls() data_dict = json.load(f) metric_tracker.patient = data_dict["patient"] metric_tracker._best_epoch = data_dict["_best_epoch"] for epoch, item in data_dict["metric_tracker_dict"].items(): epoch = int(epoch) train_metric = item["train_metric"] validation_metric = item["validation_metric"] train_model_target_metric = ModelTargetMetric( metric_name=item["train_model_target_metric"]["_metric_name"], metric_value=item["train_model_target_metric"]["_metric_value"]) validation_model_target_metric = ModelTargetMetric( metric_name=item["validation_model_target_metric"]["_metric_name"], metric_value=item["validation_model_target_metric"]["_metric_value"]) metric_tracker.metric_tracker_dict[epoch] = MetricTrackerItem( epoch=item["epoch"], train_metric=train_metric, train_model_target_metric=train_model_target_metric, validation_metric=validation_metric, validation_model_target_metric=validation_model_target_metric) return metric_tracker
def __call__( self, model_outputs: MRCNerOutput, golden_labels: Dict[str, Tensor]) -> Tuple[Dict, ModelTargetMetric]: """ 计算 metric :param model_outputs: :param golden_labels: start_position_labels, end_position_labels, batch_match_positions :return: """ model_outputs: MRCNerOutput = model_outputs match_prediction_labels = self.model_label_decoder.decode_label_index( model_outputs=model_outputs) match_golden_labels = golden_labels["match_position_labels"] # 计算 overall f1 mask = model_outputs.mask.detach() metric_dict = self.mrc_f1_metric( prediction_match_labels=match_prediction_labels, gold_match_labels=match_golden_labels, mask=mask) target_metric = ModelTargetMetric( metric_name=MRCF1Metric.F1_OVERALL, metric_value=metric_dict[MRCF1Metric.F1_OVERALL]) return metric_dict, target_metric
def __call__(self, model_outputs: _DemoOutputs, golden_labels: Tensor) -> Tuple[Dict, ModelTargetMetric]: model_outputs: _DemoOutputs = model_outputs label_indices = self._label_decoder.decode_label_index(model_outputs=model_outputs) acc = self._acc(prediction_labels=label_indices, gold_labels=golden_labels, mask=None) target = ModelTargetMetric(AccMetric.ACC, acc[AccMetric.ACC]) return acc, target
def metric(self) -> Tuple[Dict, ModelTargetMetric]: metric_dict = self._metric.metric target_metric = ModelTargetMetric(metric_name=AccMetric.ACC, metric_value=metric_dict[AccMetric.ACC]) return metric_dict, target_metric
def metric(self) -> Tuple[Dict, ModelTargetMetric]: metrics: Dict[str, float] = dict() for event_type, f1_metric in self._event_type_f1.items(): event_metric = EventF1MetricAdapter._event_metric_from( event_type, f1_metric.metric) metrics.update(event_metric) target_metric = ModelTargetMetric( metric_name=F1Metric.F1_OVERALL, metric_value=metrics[F1Metric.F1_OVERALL]) return metrics, target_metric
def __call__(self, model_outputs: NerModelOutputs, golden_labels: Tensor) -> Tuple[Dict, ModelTargetMetric]: model_outputs: NerModelOutputs = model_outputs prediction_labels = self.model_label_decoder.decode_label_index(model_outputs=model_outputs) metric_dict = self.span_f1_metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=model_outputs.mask) target_metric = ModelTargetMetric(metric_name=SpanF1Metric.F1_OVERALL, metric_value=metric_dict[SpanF1Metric.F1_OVERALL]) return metric_dict, target_metric
def __call__(self, model_outputs: ACSAModelOutputs, golden_labels: Tensor) -> Tuple[Dict, ModelTargetMetric]: logits = model_outputs.logits.detach().cpu() cpu_model_outputs = ACSAModelOutputs(logits=logits) golden_labels = golden_labels.detach().cpu() prediction_labels = self._label_decoder.decode_label_index(model_outputs=cpu_model_outputs) metric_dict = self._metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=None) target_metric = ModelTargetMetric(metric_name=AccMetric.ACC, metric_value=metric_dict[AccMetric.ACC]) return metric_dict, target_metric
def __call__(self, model_outputs: EventModelOutputs, golden_labels: Tensor) -> Tuple[Dict, ModelTargetMetric]: metrics = dict() logits = model_outputs.logits.detach().cpu() event_type_indices = model_outputs.event_type.detach().cpu() golden_labels = golden_labels.detach().cpu() assert logits.dim( ) == 1, f"logits shape 应该是 (B,), 现在 dim 是: {logits.dim()}" assert event_type_indices.dim( ) == 1, f"event_type shape 应该是 (B,), 现在 dim 是: {logits.dim()}" assert golden_labels.dim( ) == 1, f"golden_labels shape 应该是 (B,), 现在 dim 是: {golden_labels.dim()}" predictions = (logits > 0.5).long() for event_type, f1_metric in self._event_type_f1.items(): if event_type == EventF1MetricAdapter.__OVERALL: negative_event_type_index = self._event_type_vocabulary.index( self._event_type_vocabulary.unk) mask = (event_type_indices != negative_event_type_index).long() else: event_type_index = self._event_type_vocabulary.index( event_type) mask = (event_type_indices == event_type_index).long() event_type_metric = f1_metric(prediction_labels=predictions, gold_labels=golden_labels, mask=mask) event_type_metric = EventF1MetricAdapter._event_metric_from( event_type, event_type_metric) metrics.update(event_type_metric) target_metric = ModelTargetMetric( metric_name=F1Metric.F1_OVERALL, metric_value=metrics[F1Metric.F1_OVERALL]) return metrics, target_metric
def metric(self) -> Tuple[Dict, ModelTargetMetric]: f1_metric = self.mrc_f1_metric.metric target_metric = ModelTargetMetric( metric_name=MRCF1Metric.F1_OVERALL, metric_value=f1_metric[MRCF1Metric.F1_OVERALL]) return f1_metric, target_metric
def metric(self) -> Tuple[Dict, ModelTargetMetric]: acc = self._acc.metric target = ModelTargetMetric(AccMetric.ACC, acc[AccMetric.ACC]) return acc, target
""" 测试 metric tracker Authors: panxu([email protected]) Date: 2020/05/28 14:46:00 """ import os from easytext.tests import ASSERT from easytext.tests import ROOT_PATH from easytext.trainer import MetricTracker from easytext.metrics import ModelTargetMetric METRICS = [{"epoch": 1, "train_metric": {"acc": 0.81}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.81), "validation_metric": {"acc": 0.46}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.46)}, {"epoch": 2, "train_metric": {"acc": 0.83}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.83), "validation_metric": {"acc": 0.48}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.48)}, {"epoch": 3, "train_metric": {"acc": 0.85}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85), "validation_metric": {"acc": 0.60}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)}, {"epoch": 4, "train_metric": {"acc": 0.89}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.89),
def metric(self) -> Tuple[Dict, ModelTargetMetric]: target_metric = ModelTargetMetric( metric_name=SpanF1Metric.F1_OVERALL, metric_value=self.span_f1_metric.metric[SpanF1Metric.F1_OVERALL]) return self.span_f1_metric.metric, target_metric