Esempio n. 1
0
def create(
    image_source: Path,
    label_source: Path,
    image_target_dir: Path,
    label_target_dir: Path,
    df: pd.DataFrame,
    fg_only: bool = False,
):
    image_target_dir.mkdir(parents=True, exist_ok=True)
    label_target_dir.mkdir(parents=True, exist_ok=True)

    case_id = image_source.stem.rsplit('-', 1)[0]
    case_id_check = label_source.stem.rsplit('-', 1)[0]
    assert case_id == case_id_check, f"case ids not matching, found image {case_id} and label {case_id_check}"

    df_case = df.loc[df['public_id'] == case_id]
    instances = {}
    for row in df_case.itertuples():
        _cls = int(row.label_code)
        if _cls == 0:  # background has label code 0 and lab id 0
            continue

        if fg_only:
            _cls = 1
        elif _cls == -1:
            _cls = 5

        instances[str(
            row.label_id)] = _cls - 1  # class range from 0 - 4 // if fg only 0
        assert 0 < _cls < 6, f"Something strange happened {_cls}"
    save_json({"instances": instances}, label_target_dir / f"{case_id}.json")

    shutil.copy2(image_source, image_target_dir / f"{case_id}_0000.nii.gz")
    shutil.copy2(label_source, label_target_dir / f"{case_id}.nii.gz")
Esempio n. 2
0
def generate_image(image_dir, label_dir, idx):
    random.seed(idx)
    np.random.seed(idx)

    logger.info(f"Generating case_{idx}")
    selected_size = np.random.randint(object_size[0], object_size[1])
    selected_class = np.random.randint(0, 2)

    data = np.random.rand(*image_size)
    mask = np.zeros_like(data)

    top_left = [
        np.random.randint(0, image_size[i] - selected_size) for i in range(dim)
    ]

    if selected_class == 0:
        slicing = tuple([slice(tp, tp + selected_size) for tp in top_left])
        data[slicing] = data[slicing] + 0.4
        data = data.clip(0, 1)
        mask[slicing] = 1
    elif selected_class == 1:
        slicing = tuple([slice(tp, tp + selected_size) for tp in top_left])

        inner_slicing = [
            slice(tp + object_width, tp + selected_size - object_width)
            for tp in top_left
        ]
        if len(inner_slicing) == 3:
            inner_slicing[0] = slice(0, image_size[0])
        inner_slicing = tuple(inner_slicing)

        object_mask = np.zeros_like(mask).astype(bool)
        object_mask[slicing] = 1
        object_mask[inner_slicing] = 0

        data[object_mask] = data[object_mask] + 0.4
        data = data.clip(0, 1)
        mask[object_mask] = 1
    else:
        raise NotImplementedError

    if dim == 2:
        data = data[None]
        mask = mask[None]

    data_itk = sitk.GetImageFromArray(data)
    mask_itk = sitk.GetImageFromArray(mask)
    mask_meta = {
        "instances": {
            "1": selected_class
        },
    }
    sitk.WriteImage(data_itk, str(image_dir / f"case_{idx}_0000.nii.gz"))
    sitk.WriteImage(mask_itk, str(label_dir / f"case_{idx}.nii.gz"))
    save_json(mask_meta, label_dir / f"case_{idx}.json")
Esempio n. 3
0
def run_prep(source_data: Path, source_label: Path, target_data_dir,
             target_label_dir: Path):
    case_id = f"{(source_data.stem).rsplit('_', 1)[0]}"

    shutil.copy(source_data, target_data_dir / f"{case_id}_0000.nii.gz")
    shutil.copy(source_label, target_label_dir /
                f"{case_id}.nii.gz")  # rename label file to match data
    label_itk = sitk.ReadImage(str(source_label))

    label_np = sitk.GetArrayFromImage(label_itk)
    instances = {int(_id + 1): 0 for _id in range(label_np.max())}
    save_json({"instances": instances}, target_label_dir / f"{case_id}")
Esempio n. 4
0
def main():
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task017_CADA"

    # setup raw paths
    source_data_dir = task_data_dir / "raw" / "train_dataset"
    if not source_data_dir.is_dir():
        raise RuntimeError(
            f"{source_data_dir} should contain the raw data but does not exist."
        )
    source_label_dir = task_data_dir / "raw" / "train_mask_images"
    if not source_label_dir.is_dir():
        raise RuntimeError(
            f"{source_label_dir} should contain the raw labels but does not exist."
        )

    # setup raw splitted dirs
    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
    target_data_dir.mkdir(exist_ok=True, parents=True)
    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    target_label_dir.mkdir(exist_ok=True, parents=True)

    # prepare dataset info
    meta = {
        "name": "CADA",
        "task": "Task017_CADA",
        "target_class": None,
        "test_labels": False,
        "labels": {
            "0": "aneurysm"
        },
        "modalities": {
            "0": "CT"
        },
        "dim": 3,
    }
    save_json(meta, task_data_dir / "dataset.json")

    # prepare data & label
    case_ids = [(p.stem).rsplit('_', 1)[0]
                for p in source_data_dir.glob("*.nii.gz")]
    print(f"Found {len(case_ids)} case ids")
    for cid in maybe_verbose_iterable(case_ids):
        run_prep(
            source_data=source_data_dir / f"{cid}_orig.nii.gz",
            source_label=source_label_dir / f"{cid}_labeledMasks.nii.gz",
            target_data_dir=target_data_dir,
            target_label_dir=target_label_dir,
        )
Esempio n. 5
0
def convert_raw(task, overwrite, ov):
    task_name_full = get_task(task, name=True)
    task_num, task_name = task_name_full[4:].split('_', 1)
    new_task_name_full = f"Task{task_num}FG_{task_name}"

    cfg = compose(task, "config.yaml", overrides=ov if ov is not None else [])
    print(cfg.pretty())

    source_splitted_dir = Path(cfg["host"]["splitted_4d_output_dir"])
    target_splitted_dir = Path(str(source_splitted_dir).replace(task_name_full, new_task_name_full))
    if target_splitted_dir.is_dir() and overwrite:
        shutil.rmtree(target_splitted_dir)
    target_splitted_dir.mkdir(parents=True)

    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(target_splitted_dir.parent / "convert_cls2fg.log", level="DEBUG")

    # update dataset_info
    source_data_info = Path(cfg["host"]["data_dir"])
    data_info = load_dataset_info(source_data_info)
    data_info.pop("labels")
    data_info["labels"] = {"0": "fg"}
    data_info["task"] = new_task_name_full
    save_json(data_info, target_splitted_dir.parent / "dataset.json", indent=4)

    for postfix in ["Tr", "Ts"]:
        source_image_dir = source_splitted_dir / f"images{postfix}"
        source_label_dir = source_splitted_dir / f"labels{postfix}"

        if not source_image_dir.is_dir():
            logger.info(f"{source_image_dir} is not a dir. Skipping it.")
            continue

        # copy images and labels
        shutil.copytree(source_image_dir, target_splitted_dir / f"images{postfix}")
        shutil.copytree(source_label_dir, target_splitted_dir / f"labels{postfix}")

        # remap properties file to foreground class
        target_label_dir = target_splitted_dir / f"labels{postfix}"
        for f in [l for l in target_label_dir.glob("*.json")]:
            props = load_json(f)
            props["instances"] = {key: 0 for key in props["instances"].keys()}
            save_json(props, f)
Esempio n. 6
0
def prepare_image(
    case_id: str,
    base_dir: Path,
    mask_dir: Path,
    raw_splitted_dir: Path,
):
    logger.info(f"Processing {case_id}")
    root_data_dir = base_dir / case_id
    patient_data_dir = []
    for root, dirs, files in os.walk(root_data_dir, topdown=False):
        if any([f.endswith(".dcm") for f in files]):
            patient_data_dir.append(Path(root))
    assert len(patient_data_dir) == 1
    patient_data_dir = patient_data_dir[0]

    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(str(patient_data_dir))
    reader.SetFileNames(dicom_names)
    data_itk = reader.Execute()

    patient_label_dir = mask_dir / case_id
    label_path = [
        p for p in patient_label_dir.iterdir()
        if p.is_file() and p.name.endswith(".nii.gz")
    ]
    assert len(label_path) == 1
    label_path = label_path[0]

    mask = load_sitk_as_array(label_path)[0]
    instances = np.unique(mask)
    instances = instances[instances > 0]
    meta = {"instances": {str(int(i)): 0 for i in instances}}
    meta["original_path_data"] = str(patient_data_dir)
    meta["original_path_label"] = str(label_path)

    save_json(meta, raw_splitted_dir / "labelsTr" / f"{case_id}.json")

    sitk.WriteImage(
        data_itk,
        str(raw_splitted_dir / "imagesTr" / f"{case_id}_0000.nii.gz"))
    shutil.copy(label_path,
                raw_splitted_dir / "labelsTr" / f"{case_id}.nii.gz")
Esempio n. 7
0
def main():
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task019FG_ADAM"
    
    # setup raw paths
    source_data_dir = task_data_dir / "raw" / "ADAM_release_subjs"
    if not source_data_dir.is_dir():
        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")

    # setup raw splitted dirs
    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
    target_data_dir.mkdir(exist_ok=True, parents=True)
    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    target_label_dir.mkdir(exist_ok=True, parents=True)

    # prepare dataset info
    meta = {
        "name": "ADAM",
        "task": "Task019FG_ADAM",
        "target_class": None,
        "test_labels": False,
        "labels": {"0": "Aneurysm"}, # since we are running FG vs BG this is not completely correct
        "modalities": {"0": "Structured", "1": "TOF"},
        "dim": 3,
    }
    save_json(meta, task_data_dir / "dataset.json")

    # prepare data
    case_ids = [p.stem for p in source_data_dir.iterdir() if p.is_dir()]
    print(f"Found {len(case_ids)} case ids")
    for cid in maybe_verbose_iterable(case_ids):
        run_prep_fg_v_bg(
            case_id=cid,
            source_data=source_data_dir,
            target_data_dir=target_data_dir,
            target_label_dir=target_label_dir,
            )
Esempio n. 8
0
    assert len(image_paths) == len(label_paths)

    meta = {
        "name": "RibFracFG",
        "task": "Task020FG_RibFrac",
        "target_class": None,
        "test_labels": False,
        "labels": {
            "0": "fracture"
        },  # since we are running FG vs BG this is not completely correct
        "modalities": {
            "0": "CT"
        },
        "dim": 3,
    }
    save_json(meta, task_data_dir / "dataset.json")

    for ip, lp in maybe_verbose_iterable(list(zip(image_paths, label_paths))):
        create(
            image_source=ip,
            label_source=lp,
            image_target_dir=target_data_dir,
            label_target_dir=target_label_dir,
            df=df,
            fg_only=True,
        )


if __name__ == '__main__':
    main()
Esempio n. 9
0
def prepare_detection_label(
    case_id: str,
    label_dir: Path,
    things_classes: Sequence[int],
    stuff_classes: Sequence[int],
    min_size: float = 0,
    min_vol: float = 0,
):
    if (label_dir / f"{case_id}.json").is_file():
        logger.info(f"Found existing case {case_id} -> skipping")
        return
    logger.info(f"Processing {case_id}")
    seg_itk = load_sitk(label_dir / f"{case_id}.nii.gz")
    spacing = np.asarray(seg_itk.GetSpacing())[::-1]
    seg = sitk.GetArrayFromImage(seg_itk)

    # prepare stuff information
    stuff_seg = np.zeros_like(seg)
    if stuff_classes:
        for new_class, old_class in enumerate(stuff_classes, start=1):
            stuff_seg[seg == old_class] = new_class
        stuff_seg_itk = copy_meta_data_itk(seg_itk,
                                           sitk.GetImageFromArray(stuff_seg))
        sitk.WriteImage(stuff_seg_itk,
                        str(label_dir / f"{case_id}_stuff.nii.gz"))

    # prepare things information
    structure = np.ones([3] * seg.ndim)
    things_seg = np.copy(seg)
    things_seg[stuff_seg > 0] = 0  # remove all stuff classes from segmentation

    instances_not_filtered, _ = label(things_seg, structure=structure)
    final_mapping = {}
    if instances_not_filtered.max() > 0:
        boxes = get_bbox_np(instances_not_filtered[None])["boxes"]
        box_sizes = box_size_np(boxes)

        instance_ids = np.unique(instances_not_filtered)
        instance_ids = instance_ids[instance_ids > 0]

        assert len(instance_ids) == len(boxes)
        isotopic_axis = list(range(seg.ndim))
        isotopic_axis.pop(np.argmax(spacing))
        instances = np.zeros_like(instances_not_filtered)

        start_id = 1
        for iid, bsize in zip(instance_ids, box_sizes):
            bsize_world = bsize * spacing
            instance_mask = (instances_not_filtered == iid)
            instance_vol = instance_mask.sum()

            if all(bsize_world[isotopic_axis] > min_size) and (instance_vol >
                                                               min_vol):
                instances[instance_mask] = start_id

                single_idx = np.argwhere(instance_mask)[0]
                semantic_class = int(seg[tuple(single_idx)])
                final_mapping[start_id] = things_classes.index(semantic_class)

                start_id += 1
    else:
        instances = np.zeros_like(instances_not_filtered)

    final_instances_itk = copy_meta_data_itk(seg_itk,
                                             sitk.GetImageFromArray(instances))
    sitk.WriteImage(final_instances_itk, str(label_dir / f"{case_id}.nii.gz"))
    save_json({"instances": final_mapping}, label_dir / f"{case_id}.json")

    sitk.WriteImage(seg_itk, str(label_dir / f"{case_id}_orig.nii.gz"))
Esempio n. 10
0
        if do_volume_ranking:
            for postfix in ["Tr", "Ts"]:
                if (label_dir := splitted_dir / f"labels{postfix}").is_dir():
                    ranking = []
                    for case_id in tqdm(
                        [f.stem for f in label_dir.glob("*.json")]):
                        instances = load_sitk_as_array(label_dir /
                                                       f"{case_id}.nii.gz")[0]
                        instance_ids, instance_counts = np.unique(
                            instances, return_counts=True)
                        cps = [
                            np.argwhere(instances == iid)[0].tolist()
                            for iid in instance_ids[1:]
                        ]
                        assert len(instance_ids) - 1 == len(cps)
                        tmp = [{
                            "case_id": str(case_id),
                            "instance_id": int(iid),
                            "vol": int(vol),
                            "cp": list(cp)[::-1]
                        } for iid, vol, cp in zip(instance_ids[1:],
                                                  instance_counts[1:], cps)]
                        ranking.extend(tmp)
                    ranking = sorted(ranking, key=lambda x: x["vol"])
                    save_json(ranking,
                              splitted_dir / f"volume_ranking_{postfix}.json")
                else:
                    logger.info(
                        f"Did not find dir {label_dir} for volume ranking")
Esempio n. 11
0
def main():
    """
    Generate an example dataset for nnDetection to test the installation or
    experiment with ideas.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--full',
        help="Increase size of dataset. "
        "Default sizes train/test 10/10 and full 1000/1000.",
        action='store_true',
    )
    parser.add_argument(
        '--num_processes',
        help="Use multiprocessing to create dataset.",
        type=int,
        default=0,
    )
    args = parser.parse_args()

    full = args.full
    num_processes = args.num_processes

    num_images_tr = 1000 if full else 10
    num_images_ts = 1000 if full else 10

    meta = {
        "task": f"Task000D{dim}_Example",
        "name": "Example",
        "target_class": None,
        "test_labels": True,
        "labels": {
            "0": "Square",
            "1": "SquareHole"
        },
        "modalities": {
            "0": "MRI"
        },
        "dim": dim,
    }

    # setup paths
    data_task_dir = Path(os.getenv("det_data")) / meta["task"]
    data_task_dir.mkdir(parents=True, exist_ok=True)
    save_json(meta, data_task_dir / "dataset.json")

    raw_splitted_dir = data_task_dir / "raw_splitted"
    images_tr_dir = raw_splitted_dir / "imagesTr"
    images_tr_dir.mkdir(parents=True, exist_ok=True)
    labels_tr_dir = raw_splitted_dir / "labelsTr"
    labels_tr_dir.mkdir(parents=True, exist_ok=True)
    images_ts_dir = raw_splitted_dir / "imagesTs"
    images_ts_dir.mkdir(parents=True, exist_ok=True)
    labels_ts_dir = raw_splitted_dir / "labelsTs"
    labels_ts_dir.mkdir(parents=True, exist_ok=True)

    if num_processes == 0:
        for idx in range(num_images_tr):
            generate_image(
                images_tr_dir,
                labels_tr_dir,
                idx,
            )

        for idx in range(num_images_tr, num_images_tr + num_images_ts):
            generate_image(
                images_ts_dir,
                labels_ts_dir,
                idx,
            )
    else:
        logger.info("Using multiprocessing to create example dataset.")
        with Pool(processes=num_processes) as p:
            p.starmap(
                generate_image,
                zip(
                    repeat(images_tr_dir),
                    repeat(labels_tr_dir),
                    range(num_images_tr),
                ))
        with Pool(processes=num_processes) as p:
            p.starmap(
                generate_image,
                zip(
                    repeat(images_ts_dir),
                    repeat(labels_ts_dir),
                    range(num_images_tr, num_images_tr + num_images_ts),
                ))
Esempio n. 12
0
def prepare_case(case_id,
                 data_dirs,
                 ktrans_dirs,
                 t2_masks,
                 df_labels,
                 df_masks,
                 data_target,
                 label_target,
                 ):
    try:
        logger.info(f"Preparing {case_id}")

        tmp_dir = data_dirs / case_id
        _dirs = [f for f in tmp_dir.iterdir() if f.is_dir()]
        assert len(_dirs) == 1
        data_dir = tmp_dir / _dirs[0]

        df_mask_case = df_masks[df_masks['T2'].str.contains(case_id)]
        assert len(df_mask_case) == 1

        t2_mask_file = df_mask_case.iloc[0]["T2"]
        assert f"{case_id}" in t2_mask_file
        t2_series_id = int(t2_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1])

        adc_mask_file = df_mask_case.iloc[0]["ADC"]
        assert f"{case_id}" in adc_mask_file
        if case_id == "ProstateX-0025":
            # case 0025 has a 7a inside the table
            adc_series_id = 7
            assert adc_mask_file.endswith("7a.nii.gz")
        elif case_id == "ProstateX-0113":
            # even though the table shows 9 as the series
            # ID we use 10 because 9 is not an ADC file?
            adc_series_id = int(adc_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1])
            assert adc_series_id == 9
            adc_series_id = 10
        else:
            adc_series_id = int(adc_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1])

        # T2
        t2_dir = [f for f in data_dir.glob("*t2*") if f.name.startswith(f"{t2_series_id}.")]
        assert len(t2_dir) == 1
        t2_data_itk = load_dicom_series_sitk(t2_dir[0])

        # ADC
        adc_dir = [f for f in data_dir.glob("*ADC*") if f.name.startswith(f"{adc_series_id}.")]
        assert len(adc_dir) == 1
        adc_data_itk = load_dicom_series_sitk(adc_dir[0])

        # PD-W
        pdw_dir = sorted(data_dir.glob("* PD *"))[-1]
        pdw_data_itk = load_dicom_series_sitk(pdw_dir)

        # k-trans
        ktrans_dir = ktrans_dirs / case_id
        ktrans_data_itk = load_sitk(ktrans_dir / f"{case_id}-Ktrans.mhd")

        # resample data to t2 (only early fusion is currently supported)
        resampler = sitk.ResampleImageFilter()  # default linear
        resampler.SetReferenceImage(t2_data_itk)
        adc_data_itk_res = resampler.Execute(adc_data_itk)
        pdw_data_itk_res = resampler.Execute(pdw_data_itk)
        ktrans_data_itk_res = resampler.Execute(ktrans_data_itk)

        # prepare mask
        mask_paths = list(t2_masks.glob(f"{case_id}*"))
        fids = [int([l for l in mp.name.split("-") if "Finding" in l][0][7:]) for mp in mask_paths]
        mask_itk = load_sitk(str(mask_paths[0]))
        mask = sitk.GetArrayFromImage(mask_itk)
        mask[mask > 0] = 1

        for idx, mp in enumerate(mask_paths[1:], start=2):
            _mask = load_sitk_as_array(str(mp))[0]
            mask[_mask > 0] = idx

        mask_final = sitk.GetImageFromArray(mask)
        copy_meta_data_itk(t2_data_itk, mask_final)

        df_case = df_labels.loc[df_labels['ProxID'] == case_id]
        instances = {}
        for row in df_case.itertuples():
            if row.fid in fids:
                instances[fids.index(int(row.fid)) + 1] = int(row.ClinSig)
            else:
                logger.info(f"Found removed fid {row.fid} in {case_id}")

        # save
        sitk.WriteImage(t2_data_itk, str(data_target / f"{case_id}_0000.nii.gz"))
        sitk.WriteImage(adc_data_itk_res, str(data_target / f"{case_id}_0001.nii.gz"))
        sitk.WriteImage(pdw_data_itk_res, str(data_target / f"{case_id}_0002.nii.gz"))
        sitk.WriteImage(ktrans_data_itk_res, str(data_target / f"{case_id}_0003.nii.gz"))
        sitk.WriteImage(mask_final, str(label_target / f"{case_id}.nii.gz"))
        save_json({"instances": instances}, label_target / f"{case_id}.json")
    except Exception as e:
        logger.error(f"Case {case_id} failed with {e} and {traceback.format_exc()}")
Esempio n. 13
0
def main():
    """
    Does not use the KTrans Sequence of ProstateX
    This script only uses the provided T2 masks
    """
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task021_ProstateX"

    # setup raw paths
    source_data_dir = task_data_dir / "raw"
    if not source_data_dir.is_dir():
        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")

    source_data = source_data_dir / "PROSTATEx"
    source_masks = source_data_dir / "rcuocolo-PROSTATEx_masks-e344452"
    source_ktrans = source_data_dir / "ktrains"
    csv_labels = source_data_dir / "ProstateX-TrainingLesionInformationv2" / "ProstateX-Findings-Train.csv"
    csv_masks = source_data_dir / "rcuocolo-PROSTATEx_masks-e344452" / "Files" / "Image_list.csv"

    data_target = task_data_dir / "raw_splitted" / "imagesTr"
    data_target.mkdir(parents=True, exist_ok=True)
    label_target = task_data_dir / "raw_splitted" / "labelsTr"
    label_target.mkdir(parents=True, exist_ok=True)

    logger.remove()
    logger.add(sys.stdout, format="{level} {message}", level="INFO")
    logger.add(data_target.parent.parent / "prepare.log", level="DEBUG")

    base_masks = source_masks / "Files" / "Masks"
    t2_masks = base_masks / "T2"

    df_labels = pd.read_csv(csv_labels)
    df_masks = pd.read_csv(csv_masks)
    case_ids = [f.stem.split("-", 2)[:2] for f in t2_masks.glob("*nii.gz")]
    case_ids = list(set([f"{c[0]}-{c[1]}" for c in case_ids]))
    logger.info(f"Found {len(case_ids)} cases")

    # save meta
    logger.info("Saving dataset info")
    dataset_info = {
        "name": "ProstateX",
        "task": "Task021_ProstateX",

        "target_class": None,
        "test_labels": False,

        "labels": {
            "0": "clinically_significant",
            "1": "clinically_insignificant",
        },
        "modalities": {
            "0": "T2",
            "1": "ADC",
            "2": "PD-W",
            "3": "Ktrans"
        },
        "dim": 3,
        "info": "Ground Truth: T2 Masks; \n"
                "Modalities: T2, ADC, PD-W, Ktrans \n;"
                "Classes: clinically significant = 1, insignificant = 0 \n"
                "Keep: ProstateX-0025 '10-28-2011-MR prostaat kanker detectie WDSmc MCAPRODETW-19047'\n"
                "Masks\n"
                "https://github.com/rcuocolo/PROSTATEx_masks\n"
                "Github hash: e3444521e70cd5e8d405f4e9a6bc08312df8afe7"
    }
    save_json(dataset_info, task_data_dir / "dataset.json")

    # prepare labels and data
    for cid in maybe_verbose_iterable(case_ids):
        prepare_case(cid,
                     data_dirs=source_data,
                     ktrans_dirs=source_ktrans,
                     t2_masks=t2_masks,
                     df_labels=df_labels,
                     df_masks=df_masks,
                     data_target=data_target,
                     label_target=label_target,
                     )

    # with Pool(processes=6) as p:
    #     p.starmap(prepare_case, zip(case_ids,
    #                                 repeat(source_data),
    #                                 repeat(source_ktrans),
    #                                 repeat(t2_masks),
    #                                 repeat(df_labels),
    #                                 repeat(df_masks),
    #                                 repeat(data_target),
    #                                 repeat(label_target),
    #                                 ))

    # create test split
    create_test_split(task_data_dir / "raw_splitted",
                      num_modalities=len(dataset_info["modalities"]),
                      test_size=0.3,
                      random_state=0,
                      shuffle=True,
                      )
Esempio n. 14
0
def main():
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task011_Kits"
    source_data_dir = task_data_dir / "raw"

    if not source_data_dir.is_dir():
        raise RuntimeError(
            f"{source_data_dir} should contain the raw data but does not exist."
        )

    splitted_dir = task_data_dir / "raw_splitted"
    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
    target_data_dir.mkdir(exist_ok=True, parents=True)
    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    target_label_dir.mkdir(exist_ok=True, parents=True)

    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(task_data_dir / "prepare.log", level="DEBUG")

    # save meta info
    dataset_info = {
        "name": "Kits",
        "task": "Task011_Kits",
        "target_class": None,
        "test_labels": True,
        "seg2det_stuff": [
            1,
        ],  # define stuff classes: kidney
        "seg2det_things": [
            2,
        ],  # define things classes: tumor
        "min_size": 3.,
        "labels": {
            "0": "lesion"
        },
        "labels_stuff": {
            "1": "kidney"
        },
        "modalities": {
            "0": "CT"
        },
        "dim": 3,
    }
    save_json(dataset_info, task_data_dir / "dataset.json")

    # prepare cases
    cases = [str(c.name) for c in source_data_dir.iterdir() if c.is_dir()]
    for c in maybe_verbose_iterable(cases):
        logger.info(f"Copy case {c}")
        case_id = int(c.split("_")[-1])
        if case_id < 210:
            shutil.copy(source_data_dir / c / "imaging.nii.gz",
                        target_data_dir / f"{c}_0000.nii.gz")
            shutil.copy(source_data_dir / c / "segmentation.nii.gz",
                        target_label_dir / f"{c}.nii.gz")

    # create an artificial test split
    create_test_split(
        splitted_dir=splitted_dir,
        num_modalities=1,
        test_size=0.3,
        random_state=0,
        shuffle=True,
    )
Esempio n. 15
0
def main():
    det_data_dir = Path(os.getenv("det_data"))
    task_data_dir = det_data_dir / "Task025_LymphNodes"
    source_data_base = task_data_dir / "raw"
    if not source_data_base.is_dir():
        raise RuntimeError(
            f"{source_data_base} should contain the raw data but does not exist."
        )

    raw_splitted_dir = task_data_dir / "raw_splitted"
    (raw_splitted_dir / "imagesTr").mkdir(parents=True, exist_ok=True)
    (raw_splitted_dir / "labelsTr").mkdir(parents=True, exist_ok=True)
    (raw_splitted_dir / "imagesTs").mkdir(parents=True, exist_ok=True)
    (raw_splitted_dir / "labelsTs").mkdir(parents=True, exist_ok=True)

    logger.remove()
    logger.add(sys.stdout, format="{level} {message}", level="DEBUG")
    logger.add(raw_splitted_dir.parent / "prepare.log", level="DEBUG")

    meta = {
        "name": "Lymph Node TCIA",
        "task": "Task025_LymphNodes",
        "target_class": None,
        "test_labels": True,
        "labels": {
            "0": "LymphNode",
        },
        "modalities": {
            "0": "CT",
        },
        "dim": 3,
    }

    save_json(meta, raw_splitted_dir.parent / "dataset.json")

    base_dir = source_data_base / "CT Lymph Nodes"
    mask_dir = source_data_base / "MED_ABD_LYMPH_MASKS"

    case_ids = sorted([p.name for p in base_dir.iterdir() if p.is_dir()])
    logger.info(f"Found {len(case_ids)} cases in {base_dir}")

    for cid in maybe_verbose_iterable(case_ids):
        prepare_image(
            case_id=cid,
            base_dir=base_dir,
            mask_dir=mask_dir,
            raw_splitted_dir=raw_splitted_dir,
        )

    # with Pool(processes=6) as p:
    #     p.starmap(
    #         prepare_image,
    #         zip(
    #             case_ids,
    #             repeat(base_dir),
    #             repeat(mask_dir),
    #             repeat(raw_splitted_dir)
    #         )
    #     )

    create_test_split(
        raw_splitted_dir,
        num_modalities=len(meta["modalities"]),
        test_size=0.3,
        random_state=0,
        shuffle=True,
    )
Esempio n. 16
0
def boxes2nii():
    import os
    import argparse
    from pathlib import Path

    import numpy as np
    import SimpleITK as sitk
    from loguru import logger

    from nndet.io import save_json, load_pickle
    from nndet.io.paths import get_task, get_training_dir
    from nndet.utils.info import maybe_verbose_iterable

    parser = argparse.ArgumentParser()
    parser.add_argument('task',
                        type=str,
                        help="Task id e.g. Task12_LIDC OR 12 OR LIDC")
    parser.add_argument('model',
                        type=str,
                        help="model name, e.g. RetinaUNetV0")
    parser.add_argument('-f',
                        '--fold',
                        type=int,
                        help="fold to sweep.",
                        default=0,
                        required=False)
    parser.add_argument('-o',
                        '--overwrites',
                        type=str,
                        nargs='+',
                        help="overwrites for config file",
                        required=False)
    parser.add_argument(
        '--threshold',
        type=float,
        help="Minimum probability of predictions",
        required=False,
        default=0.5,
    )
    parser.add_argument('--test', action='store_true')

    args = parser.parse_args()
    model = args.model
    fold = args.fold
    task = args.task
    overwrites = args.overwrites
    test = args.test
    threshold = args.threshold

    task_name = get_task(task, name=True, models=True)
    task_dir = Path(os.getenv("det_models")) / task_name

    training_dir = get_training_dir(task_dir / model, fold)

    overwrites = overwrites if overwrites is not None else []
    overwrites.append("host.parent_data=${env:det_data}")
    overwrites.append("host.parent_results=${env:det_models}")

    prediction_dir = training_dir / "test_predictions" \
        if test else training_dir / "val_predictions"
    save_dir = training_dir / "test_predictions_nii" \
        if test else training_dir / "val_predictions_nii"
    save_dir.mkdir(exist_ok=True)

    case_ids = [
        p.stem.rsplit('_', 1)[0] for p in prediction_dir.glob("*_boxes.pkl")
    ]
    for cid in maybe_verbose_iterable(case_ids):
        res = load_pickle(prediction_dir / f"{cid}_boxes.pkl")

        instance_mask = np.zeros(res["original_size_of_raw_data"],
                                 dtype=np.uint8)

        boxes = res["pred_boxes"]
        scores = res["pred_scores"]
        labels = res["pred_labels"]

        _mask = scores >= threshold
        boxes = boxes[_mask]
        labels = labels[_mask]
        scores = scores[_mask]

        idx = np.argsort(scores)
        scores = scores[idx]
        boxes = boxes[idx]
        labels = labels[idx]

        prediction_meta = {}
        for instance_id, (pbox, pscore,
                          plabel) in enumerate(zip(boxes, scores, labels),
                                               start=1):
            mask_slicing = [
                slice(int(pbox[0]), int(pbox[2])),
                slice(int(pbox[1]), int(pbox[3])),
            ]
            if instance_mask.ndim == 3:
                mask_slicing.append(slice(int(pbox[4]), int(pbox[5])))
            instance_mask[tuple(mask_slicing)] = instance_id

            prediction_meta[int(instance_id)] = {
                "score": float(pscore),
                "label": int(plabel),
                "box": list(map(int, pbox))
            }

        logger.info(
            f"Created instance mask with {instance_mask.max()} instances.")

        instance_mask_itk = sitk.GetImageFromArray(instance_mask)
        instance_mask_itk.SetOrigin(res["itk_origin"])
        instance_mask_itk.SetDirection(res["itk_direction"])
        instance_mask_itk.SetSpacing(res["itk_spacing"])

        sitk.WriteImage(instance_mask_itk,
                        str(save_dir / f"{cid}_boxes.nii.gz"))
        save_json(prediction_meta, save_dir / f"{cid}_boxes.json")