def add_label_stats_to_dataframe(input_dataframe: pd.DataFrame,
                                 dataset_root_directory: Path,
                                 target_label_names: List[str]) -> pd.DataFrame:
    """
    Loops through all available subject IDs, generates ground-truth label statistics and updates input dataframe
    with the computed stats by adding new columns. In particular, it checks the overlapping regions between
    different structures and volume of labels.

    :param input_dataframe: Input Pandas dataframe object containing subjectIds and label names
    :param dataset_root_directory: Path to dataset root directory
    :param target_label_names: A list of label names that are used in label stat computations
    """
    dataset_sources = load_dataset_sources(input_dataframe,
                                           local_dataset_root_folder=dataset_root_directory,
                                           image_channels=["ct"],
                                           ground_truth_channels=target_label_names,
                                           mask_channel=None)

    # Iterate over subjects and check overlapping labels
    for subject_id in [*dataset_sources.keys()]:
        labels = io_util.load_labels_from_dataset_source(dataset_sources[subject_id])
        overlap_stats = metrics_util.get_label_overlap_stats(labels=labels[1:, ...],
                                                             label_names=target_label_names)

        header = io_util.load_nifti_image(dataset_sources[subject_id].ground_truth_channels[0]).header
        volume_stats = metrics_util.get_label_volume(labels=labels[1:, ...],
                                                     label_names=target_label_names,
                                                     label_spacing=header.spacing)

        # Log the extracted label statistics
        for col_name, col_stats in zip(("LabelOverlap", "LabelVolume (mL)"), (overlap_stats, volume_stats)):
            input_dataframe.loc[input_dataframe.subject == subject_id, col_name] = \
                input_dataframe.loc[input_dataframe.subject == subject_id, "channel"].map(col_stats)

    return input_dataframe
Esempio n. 2
0
def main(yaml_file_path: Path) -> None:
    """
    Invoke either by
      * specifying a model, '--model Lung'
      * or specifying dataset and normalization parameters separately: --azure_dataset_id=foo --norm_method=None
    In addition, the arguments '--image_channel' and '--gt_channel' must be specified (see below).
    """
    config, runner_config, args = get_configs(
        SegmentationModelBase(should_validate=False), yaml_file_path)
    dataset_config = DatasetConfig(name=config.azure_dataset_id,
                                   local_folder=config.local_dataset,
                                   use_mounting=True)
    local_dataset, mount_context = dataset_config.to_input_dataset_local(
        workspace=runner_config.get_workspace())
    dataframe = pd.read_csv(local_dataset / DATASET_CSV_FILE_NAME)
    normalizer_config = NormalizeAndVisualizeConfig(**args)
    actual_mask_channel = None if normalizer_config.ignore_mask else config.mask_id
    image_channel = normalizer_config.image_channel or config.image_channels[0]
    if not image_channel:
        raise ValueError(
            "No image channel selected. Specify a model by name, or use the image_channel argument."
        )
    gt_channel = normalizer_config.gt_channel or config.ground_truth_ids[0]
    if not gt_channel:
        raise ValueError(
            "No GT channel selected. Specify a model by name, or use the gt_channel argument."
        )

    dataset_sources = load_dataset_sources(
        dataframe,
        local_dataset_root_folder=local_dataset,
        image_channels=[image_channel],
        ground_truth_channels=[gt_channel],
        mask_channel=actual_mask_channel)
    result_folder = local_dataset
    if normalizer_config.result_folder is not None:
        result_folder = result_folder / normalizer_config.result_folder
    if not result_folder.is_dir():
        result_folder.mkdir()
    all_patient_ids = [*dataset_sources.keys()]
    if normalizer_config.only_first == 0:
        patient_ids_to_process = all_patient_ids
    else:
        patient_ids_to_process = all_patient_ids[:normalizer_config.only_first]
    args_file = result_folder / ARGS_TXT
    args_file.write_text(" ".join(sys.argv[1:]))
    config_file = result_folder / "config.txt"
    config_file.write_text(str(config))
    normalizer = PhotometricNormalization(config)
    for patient_id in patient_ids_to_process:
        print(f"Starting to process patient {patient_id}")
        images = load_images_from_dataset_source(dataset_sources[patient_id])
        plotting.plot_normalization_result(images,
                                           normalizer,
                                           result_folder,
                                           result_prefix=image_channel)