Example #1
0
    def test_epoch(
            test_epoch: int,
            run_recovery: Optional[RunRecovery]) -> Optional[MetricsDict]:
        pipeline = create_inference_pipeline(config, test_epoch, run_recovery)

        if pipeline is None:
            return None

        # for mypy
        assert isinstance(pipeline, ScalarInferencePipelineBase)

        ml_util.set_random_seed(config.get_effective_random_seed(),
                                "Model Testing")
        ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
            shuffle=False, batch_size=1, num_dataload_workers=0)

        logging.info(
            f"Starting to evaluate model from epoch {test_epoch} on {data_split.value} set."
        )
        metrics_dict = create_metrics_dict_from_config(config)
        for sample in ds:
            result = pipeline.predict(sample)
            # Since batch size is 1, we only have 1 item in each of the fields in result
            sample_id, label_gpu, model_output = result.subject_ids[
                0], result.labels, result.model_outputs

            compute_scalar_metrics(metrics_dict, [sample_id], model_output,
                                   label_gpu, config.loss_type)
            logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

        average = metrics_dict.average(across_hues=False)
        logging.info(average.to_string())

        return metrics_dict
    def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]:
        pipeline = create_inference_pipeline(config=config,
                                             checkpoint_paths=checkpoint_paths)

        if pipeline is None:
            return None

        # for mypy
        assert isinstance(pipeline, ScalarInferencePipelineBase)

        ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing")
        ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
            shuffle=False,
            batch_size=1,
            num_dataload_workers=0
        )

        logging.info(f"Starting to evaluate model on {data_split.value} set.")
        metrics_dict = create_metrics_dict_for_scalar_models(config)
        for sample in ds:
            result = pipeline.predict(sample)
            model_output = result.posteriors
            label = result.labels.to(device=model_output.device)
            sample_id = result.subject_ids[0]
            compute_scalar_metrics(metrics_dict,
                                   subject_ids=[sample_id],
                                   model_output=model_output,
                                   labels=label,
                                   loss_type=config.loss_type)
            logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

        average = metrics_dict.average(across_hues=False)
        logging.info(average.to_string())

        return metrics_dict
Example #3
0
 def update_metrics(self, subject_ids: List[str],
                    model_output: torch.Tensor,
                    labels: torch.Tensor) -> None:
     """
     Handle metrics updates based on the provided model outputs and labels.
     """
     compute_scalar_metrics(self.metrics, subject_ids, model_output, labels,
                            self.model_config.loss_type)
def _compute_scalar_metrics(
        output_values_list: List[List[float]],
        labels: List[List[float]],
        is_classification: bool,
        hues: Optional[List[str]] = None) -> ScalarMetricsDict:
    model_output = torch.tensor(output_values_list)
    _labels = torch.tensor(labels)
    if machine_has_gpu:
        _labels = _labels.cuda()
        model_output = model_output.cuda()
    metrics_dict = ScalarMetricsDict(
        hues=hues, is_classification_metrics=is_classification)
    subject_ids = list(range(model_output.shape[0]))
    loss_type = ScalarLoss.BinaryCrossEntropyWithLogits if is_classification else ScalarLoss.MeanSquaredError
    compute_scalar_metrics(metrics_dict,
                           subject_ids,
                           model_output,
                           _labels,
                           loss_type=loss_type)
    return metrics_dict
Example #5
0
def classification_model_test(
        config: ScalarModelBase, data_split: ModelExecutionMode,
        checkpoint_paths: List[Path], model_proc: ModelProcessing,
        cross_val_split_index: int) -> InferenceMetricsForClassification:
    """
    The main testing loop for classification models. It runs a loop over all epochs for which testing should be done.
    It loads the model and datasets, then proceeds to test the model for all requested checkpoints.
    :param config: The model configuration.
    :param data_split: The name of the folder to store the results inside each epoch folder in the outputs_dir,
                       used mainly in model evaluation using different dataset splits.
    :param checkpoint_paths: Checkpoint paths to initialize model
    :param model_proc: whether we are testing an ensemble or single model
    :return: InferenceMetricsForClassification object that contains metrics related for all of the checkpoint epochs.
    """
    pipeline = create_inference_pipeline(config=config,
                                         checkpoint_paths=checkpoint_paths)
    if pipeline is None:
        raise ValueError("Inference pipeline could not be created.")
    # for mypy
    assert isinstance(pipeline, ScalarInferencePipelineBase)
    ml_util.set_random_seed(config.get_effective_random_seed(),
                            "Model Testing")
    ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
        shuffle=False, batch_size=1, num_dataload_workers=0)
    logging.info(f"Starting to evaluate model on {data_split.value} set.")
    results_folder = config.outputs_folder / get_best_epoch_results_path(
        data_split, model_proc)
    os.makedirs(str(results_folder), exist_ok=True)
    metrics_dict = create_metrics_dict_for_scalar_models(config)
    output_logger: Optional[DataframeLogger] = DataframeLogger(
        csv_path=results_folder / MODEL_OUTPUT_CSV)

    for sample in ds:
        result = pipeline.predict(sample)
        model_output = result.posteriors
        label = result.labels.to(device=model_output.device)
        sample_id = result.subject_ids[0]
        if output_logger:
            for i in range(len(config.target_names)):
                output_logger.add_record({
                    LoggingColumns.Patient.value:
                    sample_id,
                    LoggingColumns.Hue.value:
                    config.target_names[i],
                    LoggingColumns.Label.value:
                    label[0][i].item(),
                    LoggingColumns.ModelOutput.value:
                    model_output[0][i].item(),
                    LoggingColumns.CrossValidationSplitIndex.value:
                    cross_val_split_index
                })

        compute_scalar_metrics(metrics_dict,
                               subject_ids=[sample_id],
                               model_output=model_output,
                               labels=label,
                               loss_type=config.loss_type)
        logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")
    average = metrics_dict.average(across_hues=False)
    logging.info(average.to_string())
    if isinstance(metrics_dict, ScalarMetricsDict):
        csv_file = results_folder / SUBJECT_METRICS_FILE_NAME
        logging.info(
            f"Writing {data_split.value} metrics to file {str(csv_file)}")
        # If we are running inference after a training run, the validation set metrics may have been written
        # during train time. If this is not the case, or we are running on the test set, create the metrics
        # file.
        if not csv_file.exists():
            df_logger = DataframeLogger(csv_file)
            # For test if ensemble split should be default, else record which fold produced this prediction
            cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \
                else cross_val_split_index
            metrics_dict.store_metrics_per_subject(
                df_logger=df_logger,
                mode=data_split,
                cross_validation_split_index=cv_index,
                epoch=BEST_EPOCH_FOLDER_NAME)
            # write to disk
            df_logger.flush()

    if output_logger:
        output_logger.flush()

    return InferenceMetricsForClassification(metrics=metrics_dict)