コード例 #1
0
def test_classification_metrics_avg() -> None:
    hue1 = "H1"
    hue2 = "H2"
    m = MetricsDict(hues=[hue1, hue2], is_classification_metrics=True)
    m.add_metric("foo", 1.0)
    m.add_metric("foo", 2.0)
    # Perfect predictions for hue1, should give AUC == 1.0
    m.add_predictions(["S1", "S2"], np.array([0.0, 1.0]), np.array([0.0, 1.0]), hue=hue1)
    expected_hue1_auc = 1.0
    # Worst possible predictions for hue2, should give AUC == 0.0
    m.add_predictions(["S1", "S2"], np.array([1.0, 0.0]), np.array([0.0, 1.0]), hue=hue2)
    expected_hue2_auc = 0.0
    averaged = m.average(across_hues=False)
    g1_averaged = averaged.values(hue=hue1)
    assert MetricType.AREA_UNDER_ROC_CURVE.value in g1_averaged
    assert g1_averaged[MetricType.AREA_UNDER_ROC_CURVE.value] == [expected_hue1_auc]
    assert MetricType.AREA_UNDER_PR_CURVE.value in g1_averaged
    assert MetricType.SUBJECT_COUNT.value in g1_averaged
    assert g1_averaged[MetricType.SUBJECT_COUNT.value] == [2.0]
    default_averaged = averaged.values()
    assert default_averaged == {"foo": [1.5]}
    can_enumerate = list(averaged.enumerate_single_values())
    assert len(can_enumerate) >= 8
    assert can_enumerate[0] == (hue1, MetricType.AREA_UNDER_ROC_CURVE.value, 1.0)
    assert can_enumerate[-1] == (MetricsDict.DEFAULT_HUE_KEY, "foo", 1.5)

    g2_averaged = averaged.values(hue=hue2)
    assert MetricType.AREA_UNDER_ROC_CURVE.value in g2_averaged
    assert g2_averaged[MetricType.AREA_UNDER_ROC_CURVE.value] == [expected_hue2_auc]

    averaged_across_hues = m.average(across_hues=True)
    assert averaged_across_hues.get_hue_names() == [MetricsDict.DEFAULT_HUE_KEY]
    assert MetricType.AREA_UNDER_ROC_CURVE.value in averaged_across_hues.values()
    expected_averaged_auc = 0.5 * (expected_hue1_auc + expected_hue2_auc)
    assert averaged_across_hues.values()[MetricType.AREA_UNDER_ROC_CURVE.value] == [expected_averaged_auc]
コード例 #2
0
def test_metrics_dict_add_integer() -> None:
    """
    Adding a scalar metric where the value is an integer by accident should still store the metric.
    """
    m = MetricsDict()
    m.add_metric("foo", 1)
    assert "foo" in m.values()
    assert m.values()["foo"] == [1.0]
コード例 #3
0
def test_delete_hue() -> None:
    h1 = "a"
    h2 = "b"
    a = MetricsDict(hues=[h1, h2])
    a.add_metric("foo", 1.0, hue=h1)
    a.add_metric("bar", 2.0, hue=h2)
    a.delete_hue(h1)
    assert a.get_hue_names(include_default=False) == [h2]
    assert list(a.enumerate_single_values()) == [(h2, "bar", 2.0)]
コード例 #4
0
def test_delete_metric() -> None:
    """
    Deleting a set of metrics from the dictionary.
    """
    m = MetricsDict()
    m.add_metric(MetricType.LOSS, 1)
    assert m.values()[MetricType.LOSS.value] == [1.0]
    m.delete_metric(MetricType.LOSS)
    assert MetricType.LOSS.value not in m.values()
コード例 #5
0
def test_add_foreground_dice() -> None:
    g1 = "Liver"
    g2 = "Lung"
    ground_truth_ids = [BACKGROUND_CLASS_NAME, g1, g2]
    dice = [0.85, 0.75, 0.55]
    m = MetricsDict(hues=ground_truth_ids)
    for j, ground_truth_id in enumerate(ground_truth_ids):
        m.add_metric(MetricType.DICE, dice[j], hue=ground_truth_id)
    metrics.add_average_foreground_dice(m)
    assert m.get_single_metric(MetricType.DICE) == 0.5 * (dice[1] + dice[2])
コード例 #6
0
def test_metrics_store_mixed_hues() -> None:
    """
    Test to make sure metrics dict is able to handle default and non-default hues
    """
    m = MetricsDict(hues=["A", "B"])
    m.add_metric("foo", 1)
    m.add_metric("foo", 1, hue="B")
    m.add_metric("bar", 2, hue="A")
    assert list(m.enumerate_single_values()) == \
           [('A', 'bar', 2), ('B', 'foo', 1), (MetricsDict.DEFAULT_HUE_KEY, 'foo', 1)]
コード例 #7
0
def add_average_foreground_dice(metrics: MetricsDict) -> None:
    """
    If the given metrics dictionary contains an entry for Dice score, and only one value for the Dice score per class,
    then add an average Dice score for all foreground classes to the metrics dictionary (modified in place).
    :param metrics: The object that holds metrics. The average Dice score will be written back into this object.
    """
    all_dice = []
    for structure_name in metrics.get_hue_names(include_default=False):
        if structure_name != BACKGROUND_CLASS_NAME:
            all_dice.append(metrics.get_single_metric(MetricType.DICE, hue=structure_name))
    metrics.add_metric(MetricType.DICE, np.nanmean(all_dice).item())
コード例 #8
0
def test_metrics_dict_to_string() -> None:
    """
    Test to make sure metrics dict is able to be stringified correctly
    """
    m = MetricsDict()
    m.add_metric("foo", 1.0)
    m.add_metric("bar", math.pi)
    info_df = pd.DataFrame(columns=MetricsDict.DATAFRAME_COLUMNS)
    info_df = info_df.append({MetricsDict.DATAFRAME_COLUMNS[0]: MetricsDict.DEFAULT_HUE_KEY,
                              MetricsDict.DATAFRAME_COLUMNS[1]: "foo: 1.0000, bar: 3.1416"}, ignore_index=True)
    assert m.to_string() == tabulate_dataframe(info_df)
    assert m.to_string(tabulate=False) == info_df.to_string(index=False)
コード例 #9
0
def test_metrics_dict_flatten(hues: Optional[List[str]]) -> None:
    m = MetricsDict(hues=hues)
    _hues = hues or [MetricsDict.DEFAULT_HUE_KEY] * 2
    m.add_metric("foo", 1.0, hue=_hues[0])
    m.add_metric("foo", 2.0, hue=_hues[1])
    m.add_metric("bar", 3.0, hue=_hues[0])
    m.add_metric("bar", 4.0, hue=_hues[1])

    if hues is None:
        average = m.average(across_hues=True)
        # We should be able to flatten out all the singleton values that the `average` operation returns
        all_values = list(average.enumerate_single_values())
        assert all_values == [(MetricsDict.DEFAULT_HUE_KEY, "foo", 1.5), (MetricsDict.DEFAULT_HUE_KEY, "bar", 3.5)]
        # When trying to flatten off a dictionary that has two values, this should fail:
        with pytest.raises(ValueError) as ex:
            list(m.enumerate_single_values())
        assert "only hold 1 item" in str(ex)
    else:
        average = m.average(across_hues=False)
        all_values = list(average.enumerate_single_values())
        assert all_values == [('A', 'foo', 1.0), ('A', 'bar', 3.0), ('B', 'foo', 2.0), ('B', 'bar', 4.0)]
コード例 #10
0
def test_metrics_dict1() -> None:
    """
    Test insertion of scalar values into a MetricsDict.
    """
    m = MetricsDict()
    assert m.get_hue_names() == [MetricsDict.DEFAULT_HUE_KEY]
    name = "foo"
    v1 = 2.7
    v2 = 3.14
    m.add_metric(name, v1)
    m.add_metric(name, v2)
    assert m.values()[name] == [v1, v2]
    with pytest.raises(ValueError) as ex:
        # noinspection PyTypeChecker
        m.add_metric(name, [1.0])  # type: ignore
    assert "Expected the metric to be a scalar" in str(ex)
    assert m.skip_nan_when_averaging[name] is False
    v3 = 3.0
    name2 = "bar"
    m.add_metric(name2, v3, skip_nan_when_averaging=True)
    assert m.skip_nan_when_averaging[name2] is True
    # Expected average: Metric "foo" averages over two values v1 and v2. For "bar", we only inserted one value anyhow
    average = m.average()
    mean_v1_v2 = mean([v1, v2])
    assert average.values() == {name: [mean_v1_v2], name2: [v3]}
    num_entries = m.num_entries()
    assert num_entries == {name: 2, name2: 1}
コード例 #11
0
def test_metrics_dict_average_metrics_averaging() -> None:
    """
    Test if averaging metrics avoid NaN as expected.
    """
    m = MetricsDict()
    metric1 = "foo"
    v1 = 1.0
    m.add_metric(metric1, v1)
    m.add_metric(metric1, np.nan, skip_nan_when_averaging=True)
    metric2 = "bar"
    v2 = 2.0
    m.add_metric(metric2, v2)
    m.add_metric(metric2, np.nan, skip_nan_when_averaging=False)
    average = m.average()
    assert average.values()[metric1] == [v1]
    assert np.isnan(average.values()[metric2])
コード例 #12
0
def test_aggregate_segmentation_metrics() -> None:
    """
    Test how per-epoch segmentation metrics are aggregated to computed foreground dice and voxel count proportions.
    """
    g1 = "Liver"
    g2 = "Lung"
    ground_truth_ids = [BACKGROUND_CLASS_NAME, g1, g2]
    dice = [0.85, 0.75, 0.55]
    voxels_proportion = [0.85, 0.10, 0.05]
    loss = 3.14
    other_metric = 2.71
    m = MetricsDict(hues=ground_truth_ids)
    voxel_count = 200
    # Add 3 values per metric, but such that the averages are back at the value given in dice[i]
    for i in range(3):
        delta = (i - 1) * 0.05
        for j, ground_truth_id in enumerate(ground_truth_ids):
            m.add_metric(MetricType.DICE, dice[j] + delta, hue=ground_truth_id)
            m.add_metric(MetricType.VOXEL_COUNT, int(voxels_proportion[j] * voxel_count), hue=ground_truth_id)
        m.add_metric(MetricType.LOSS, loss + delta)
        m.add_metric("foo", other_metric)
    m.add_diagnostics("foo", "bar")
    aggregate = metrics.aggregate_segmentation_metrics(m)
    assert aggregate.diagnostics == m.diagnostics
    enumerated = list((g, s, v) for g, s, v in aggregate.enumerate_single_values())
    expected = [
        # Dice and voxel count per foreground structure should be retained during averaging
        (g1, MetricType.DICE.value, dice[1]),
        (g1, MetricType.VOXEL_COUNT.value, voxels_proportion[1] * voxel_count),
        # Proportion of foreground voxels is computed during averaging
        (g1, MetricType.PROPORTION_FOREGROUND_VOXELS.value, voxels_proportion[1]),
        (g2, MetricType.DICE.value, dice[2]),
        (g2, MetricType.VOXEL_COUNT.value, voxels_proportion[2] * voxel_count),
        (g2, MetricType.PROPORTION_FOREGROUND_VOXELS.value, voxels_proportion[2]),
        # Loss is present in the default metrics group, and should be retained.
        (MetricsDict.DEFAULT_HUE_KEY, MetricType.LOSS.value, loss),
        (MetricsDict.DEFAULT_HUE_KEY, "foo", other_metric),
        # Dice averaged across the foreground structures is added during the function call, as is proportion of voxels
        (MetricsDict.DEFAULT_HUE_KEY, MetricType.DICE.value, 0.5 * (dice[1] + dice[2])),
        (MetricsDict.DEFAULT_HUE_KEY, MetricType.PROPORTION_FOREGROUND_VOXELS.value,
         voxels_proportion[1] + voxels_proportion[2]),
    ]
    assert len(enumerated) == len(expected)
    # Numbers won't match up precisely because of rounding during averaging
    for (actual, e) in zip(enumerated, expected):
        assert actual[0:2] == e[0:2]
        assert actual[2] == pytest.approx(e[2])
コード例 #13
0
def test_get_single_metric() -> None:
    h1 = "a"
    m = MetricsDict(hues=[h1])
    m1, v1 = ("foo", 1.0)
    m2, v2 = (MetricType.LOSS, 2.0)
    m.add_metric(m1, v1, hue=h1)
    m.add_metric(m2, v2)
    assert m.get_single_metric(m1, h1) == v1
    assert m.get_single_metric(m2) == v2
    with pytest.raises(KeyError) as ex1:
        m.get_single_metric(m1, "no such hue")
    assert "no such hue" in str(ex1)
    with pytest.raises(KeyError) as ex2:
        m.get_single_metric("no such metric", h1)
    assert "no such metric" in str(ex2)
    m.add_metric(m2, v2)
    with pytest.raises(ValueError) as ex3:
        m.get_single_metric(m2)
    assert "Expected a single entry" in str(ex3)
コード例 #14
0
class ModelTrainingStepsForSegmentation(
        ModelTrainingStepsBase[SegmentationModelBase, DeviceAwareModule]):
    """
    This class implements all steps necessary for training an image segmentation model during a single epoch.
    """
    def __init__(self, model_config: SegmentationModelBase,
                 train_val_params: TrainValidateParameters[DeviceAwareModule]):
        """
        Creates a new instance of the class.
        :param model_config: The configuration of a segmentation model.
        :param train_val_params: The parameters for training the model, including the optimizer and the data loaders.
        """
        super().__init__(model_config, train_val_params)
        self.example_to_save = np.random.randint(
            0, len(train_val_params.data_loader))
        self.pipeline = SegmentationForwardPass(
            model=self.train_val_params.model,
            model_config=self.model_config,
            batch_size=self.model_config.train_batch_size,
            optimizer=self.train_val_params.optimizer,
            in_training_mode=self.train_val_params.in_training_mode,
            criterion=self.compute_loss,
            gradient_scaler=train_val_params.gradient_scaler)
        self.metrics = MetricsDict(hues=[BACKGROUND_CLASS_NAME] +
                                   model_config.ground_truth_ids)

    def create_loss_function(self) -> torch.nn.Module:
        """
        Returns a torch module that computes a loss function.
        """
        return self.construct_loss_function(self.model_config)

    @classmethod
    def construct_loss_function(
            cls, model_config: SegmentationModelBase
    ) -> SupervisedLearningCriterion:
        """
        Returns a loss function from the model config; mixture losses are constructed as weighted combinations of
        other loss functions.
        """
        if model_config.loss_type == SegmentationLoss.Mixture:
            components = model_config.mixture_loss_components
            assert components is not None
            sum_weights = sum(component.weight for component in components)
            weights_and_losses = []
            for component in components:
                normalized_weight = component.weight / sum_weights
                loss_function = cls.construct_non_mixture_loss_function(
                    model_config, component.loss_type,
                    component.class_weight_power)
                weights_and_losses.append((normalized_weight, loss_function))
            return MixtureLoss(weights_and_losses)
        return cls.construct_non_mixture_loss_function(
            model_config, model_config.loss_type,
            model_config.loss_class_weight_power)

    @classmethod
    def construct_non_mixture_loss_function(
            cls, model_config: SegmentationModelBase,
            loss_type: SegmentationLoss,
            power: Optional[float]) -> SupervisedLearningCriterion:
        """
        :param model_config: model configuration to get some parameters from
        :param loss_type: type of loss function
        :param power: value for class_weight_power for the loss function
        :return: instance of loss function
        """
        if loss_type == SegmentationLoss.SoftDice:
            return SoftDiceLoss(class_weight_power=power)
        elif loss_type == SegmentationLoss.CrossEntropy:
            return CrossEntropyLoss(
                class_weight_power=power,
                smoothing_eps=model_config.label_smoothing_eps,
                focal_loss_gamma=None)
        elif loss_type == SegmentationLoss.Focal:
            return CrossEntropyLoss(
                class_weight_power=power,
                smoothing_eps=model_config.label_smoothing_eps,
                focal_loss_gamma=model_config.focal_loss_gamma)
        else:
            raise NotImplementedError(
                "Loss type {} is not implemented".format(loss_type))

    def forward_and_backward_minibatch(
            self, sample: Dict[str, Any], batch_index: int,
            epoch: int) -> ModelForwardAndBackwardsOutputs:
        """
        Runs training for a single minibatch of training data, and computes all metrics.
        :param sample: The batched sample on which the model should be trained.
        :param batch_index: The index of the present batch (supplied only for diagnostics).
        :param epoch: The number of the present epoch.
        """
        cropped_sample: CroppedSample = CroppedSample.from_dict(sample=sample)
        labels = self.model_config.get_gpu_tensor_if_possible(
            cropped_sample.labels_center_crop)

        mask = None if self.train_val_params.in_training_mode else cropped_sample.mask_center_crop
        forward_pass_result = self.pipeline.forward_pass_patches(
            patches=cropped_sample.image, labels=labels, mask=mask)
        # Clear the GPU cache between forward and backward passes to avoid possible out-of-memory
        torch.cuda.empty_cache()
        dice_for_all_classes = metrics.compute_dice_across_patches(
            segmentation=torch.tensor(
                forward_pass_result.segmentations).long(),
            ground_truth=labels,
            use_cuda=self.model_config.use_gpu,
            allow_multiple_classes_for_each_pixel=True).cpu().numpy()
        foreground_voxels = metrics_util.get_number_of_voxels_per_class(
            cropped_sample.labels)
        # loss is a scalar, also when running the forward pass over multiple crops.
        # dice_for_all_structures has one row per crop.
        if forward_pass_result.loss is None:
            raise ValueError(
                "During training, the loss should always be computed, but the value is None."
            )
        loss = forward_pass_result.loss

        # store metrics per batch
        self.metrics.add_metric(MetricType.LOSS, loss)
        for i, ground_truth_id in enumerate(
                self.metrics.get_hue_names(include_default=False)):
            for b in range(dice_for_all_classes.shape[0]):
                self.metrics.add_metric(MetricType.DICE,
                                        dice_for_all_classes[b, i].item(),
                                        hue=ground_truth_id,
                                        skip_nan_when_averaging=True)
            self.metrics.add_metric(MetricType.VOXEL_COUNT,
                                    foreground_voxels[i],
                                    hue=ground_truth_id)
        # store diagnostics per batch
        center_indices = cropped_sample.center_indices
        if isinstance(center_indices, torch.Tensor):
            center_indices = center_indices.cpu().numpy()
        self.metrics.add_diagnostics(MetricType.PATCH_CENTER.value,
                                     np.copy(center_indices))
        if self.train_val_params.in_training_mode:
            # store the sample train patch from this epoch for visualization
            if batch_index == self.example_to_save and self.model_config.store_dataset_sample:
                _store_dataset_sample(self.model_config,
                                      self.train_val_params.epoch,
                                      forward_pass_result, cropped_sample)

        return ModelForwardAndBackwardsOutputs(
            loss=loss,
            logits=forward_pass_result.posteriors,
            labels=forward_pass_result.segmentations)

    def get_epoch_results_and_store(self,
                                    epoch_time_seconds: float) -> MetricsDict:
        """
        Assembles all training results that were achieved over all minibatches, writes them to Tensorboard and
        AzureML, and returns them as a MetricsDict object.
        :param epoch_time_seconds: For diagnostics, this is the total time in seconds for training the present epoch.
        :return: A dictionary that holds all metrics averaged over the epoch.
        """
        self.metrics.add_metric(MetricType.SECONDS_PER_EPOCH,
                                epoch_time_seconds)
        assert len(self.train_val_params.epoch_learning_rate
                   ) == 1, "Expected a single entry for learning rate."
        self.metrics.add_metric(MetricType.LEARNING_RATE,
                                self.train_val_params.epoch_learning_rate[0])
        result = metrics.aggregate_segmentation_metrics(self.metrics)
        metrics.store_epoch_metrics(self.azure_and_tensorboard_logger,
                                    self.df_logger,
                                    self.train_val_params.epoch, result,
                                    self.train_val_params.epoch_learning_rate,
                                    self.model_config)
        return result