def create( image_source: Path, label_source: Path, image_target_dir: Path, label_target_dir: Path, df: pd.DataFrame, fg_only: bool = False, ): image_target_dir.mkdir(parents=True, exist_ok=True) label_target_dir.mkdir(parents=True, exist_ok=True) case_id = image_source.stem.rsplit('-', 1)[0] case_id_check = label_source.stem.rsplit('-', 1)[0] assert case_id == case_id_check, f"case ids not matching, found image {case_id} and label {case_id_check}" df_case = df.loc[df['public_id'] == case_id] instances = {} for row in df_case.itertuples(): _cls = int(row.label_code) if _cls == 0: # background has label code 0 and lab id 0 continue if fg_only: _cls = 1 elif _cls == -1: _cls = 5 instances[str( row.label_id)] = _cls - 1 # class range from 0 - 4 // if fg only 0 assert 0 < _cls < 6, f"Something strange happened {_cls}" save_json({"instances": instances}, label_target_dir / f"{case_id}.json") shutil.copy2(image_source, image_target_dir / f"{case_id}_0000.nii.gz") shutil.copy2(label_source, label_target_dir / f"{case_id}.nii.gz")
def generate_image(image_dir, label_dir, idx): random.seed(idx) np.random.seed(idx) logger.info(f"Generating case_{idx}") selected_size = np.random.randint(object_size[0], object_size[1]) selected_class = np.random.randint(0, 2) data = np.random.rand(*image_size) mask = np.zeros_like(data) top_left = [ np.random.randint(0, image_size[i] - selected_size) for i in range(dim) ] if selected_class == 0: slicing = tuple([slice(tp, tp + selected_size) for tp in top_left]) data[slicing] = data[slicing] + 0.4 data = data.clip(0, 1) mask[slicing] = 1 elif selected_class == 1: slicing = tuple([slice(tp, tp + selected_size) for tp in top_left]) inner_slicing = [ slice(tp + object_width, tp + selected_size - object_width) for tp in top_left ] if len(inner_slicing) == 3: inner_slicing[0] = slice(0, image_size[0]) inner_slicing = tuple(inner_slicing) object_mask = np.zeros_like(mask).astype(bool) object_mask[slicing] = 1 object_mask[inner_slicing] = 0 data[object_mask] = data[object_mask] + 0.4 data = data.clip(0, 1) mask[object_mask] = 1 else: raise NotImplementedError if dim == 2: data = data[None] mask = mask[None] data_itk = sitk.GetImageFromArray(data) mask_itk = sitk.GetImageFromArray(mask) mask_meta = { "instances": { "1": selected_class }, } sitk.WriteImage(data_itk, str(image_dir / f"case_{idx}_0000.nii.gz")) sitk.WriteImage(mask_itk, str(label_dir / f"case_{idx}.nii.gz")) save_json(mask_meta, label_dir / f"case_{idx}.json")
def run_prep(source_data: Path, source_label: Path, target_data_dir, target_label_dir: Path): case_id = f"{(source_data.stem).rsplit('_', 1)[0]}" shutil.copy(source_data, target_data_dir / f"{case_id}_0000.nii.gz") shutil.copy(source_label, target_label_dir / f"{case_id}.nii.gz") # rename label file to match data label_itk = sitk.ReadImage(str(source_label)) label_np = sitk.GetArrayFromImage(label_itk) instances = {int(_id + 1): 0 for _id in range(label_np.max())} save_json({"instances": instances}, target_label_dir / f"{case_id}")
def main(): det_data_dir = Path(os.getenv('det_data')) task_data_dir = det_data_dir / "Task017_CADA" # setup raw paths source_data_dir = task_data_dir / "raw" / "train_dataset" if not source_data_dir.is_dir(): raise RuntimeError( f"{source_data_dir} should contain the raw data but does not exist." ) source_label_dir = task_data_dir / "raw" / "train_mask_images" if not source_label_dir.is_dir(): raise RuntimeError( f"{source_label_dir} should contain the raw labels but does not exist." ) # setup raw splitted dirs target_data_dir = task_data_dir / "raw_splitted" / "imagesTr" target_data_dir.mkdir(exist_ok=True, parents=True) target_label_dir = task_data_dir / "raw_splitted" / "labelsTr" target_label_dir.mkdir(exist_ok=True, parents=True) # prepare dataset info meta = { "name": "CADA", "task": "Task017_CADA", "target_class": None, "test_labels": False, "labels": { "0": "aneurysm" }, "modalities": { "0": "CT" }, "dim": 3, } save_json(meta, task_data_dir / "dataset.json") # prepare data & label case_ids = [(p.stem).rsplit('_', 1)[0] for p in source_data_dir.glob("*.nii.gz")] print(f"Found {len(case_ids)} case ids") for cid in maybe_verbose_iterable(case_ids): run_prep( source_data=source_data_dir / f"{cid}_orig.nii.gz", source_label=source_label_dir / f"{cid}_labeledMasks.nii.gz", target_data_dir=target_data_dir, target_label_dir=target_label_dir, )
def convert_raw(task, overwrite, ov): task_name_full = get_task(task, name=True) task_num, task_name = task_name_full[4:].split('_', 1) new_task_name_full = f"Task{task_num}FG_{task_name}" cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) print(cfg.pretty()) source_splitted_dir = Path(cfg["host"]["splitted_4d_output_dir"]) target_splitted_dir = Path(str(source_splitted_dir).replace(task_name_full, new_task_name_full)) if target_splitted_dir.is_dir() and overwrite: shutil.rmtree(target_splitted_dir) target_splitted_dir.mkdir(parents=True) logger.remove() logger.add(sys.stdout, level="INFO") logger.add(target_splitted_dir.parent / "convert_cls2fg.log", level="DEBUG") # update dataset_info source_data_info = Path(cfg["host"]["data_dir"]) data_info = load_dataset_info(source_data_info) data_info.pop("labels") data_info["labels"] = {"0": "fg"} data_info["task"] = new_task_name_full save_json(data_info, target_splitted_dir.parent / "dataset.json", indent=4) for postfix in ["Tr", "Ts"]: source_image_dir = source_splitted_dir / f"images{postfix}" source_label_dir = source_splitted_dir / f"labels{postfix}" if not source_image_dir.is_dir(): logger.info(f"{source_image_dir} is not a dir. Skipping it.") continue # copy images and labels shutil.copytree(source_image_dir, target_splitted_dir / f"images{postfix}") shutil.copytree(source_label_dir, target_splitted_dir / f"labels{postfix}") # remap properties file to foreground class target_label_dir = target_splitted_dir / f"labels{postfix}" for f in [l for l in target_label_dir.glob("*.json")]: props = load_json(f) props["instances"] = {key: 0 for key in props["instances"].keys()} save_json(props, f)
def prepare_image( case_id: str, base_dir: Path, mask_dir: Path, raw_splitted_dir: Path, ): logger.info(f"Processing {case_id}") root_data_dir = base_dir / case_id patient_data_dir = [] for root, dirs, files in os.walk(root_data_dir, topdown=False): if any([f.endswith(".dcm") for f in files]): patient_data_dir.append(Path(root)) assert len(patient_data_dir) == 1 patient_data_dir = patient_data_dir[0] reader = sitk.ImageSeriesReader() dicom_names = reader.GetGDCMSeriesFileNames(str(patient_data_dir)) reader.SetFileNames(dicom_names) data_itk = reader.Execute() patient_label_dir = mask_dir / case_id label_path = [ p for p in patient_label_dir.iterdir() if p.is_file() and p.name.endswith(".nii.gz") ] assert len(label_path) == 1 label_path = label_path[0] mask = load_sitk_as_array(label_path)[0] instances = np.unique(mask) instances = instances[instances > 0] meta = {"instances": {str(int(i)): 0 for i in instances}} meta["original_path_data"] = str(patient_data_dir) meta["original_path_label"] = str(label_path) save_json(meta, raw_splitted_dir / "labelsTr" / f"{case_id}.json") sitk.WriteImage( data_itk, str(raw_splitted_dir / "imagesTr" / f"{case_id}_0000.nii.gz")) shutil.copy(label_path, raw_splitted_dir / "labelsTr" / f"{case_id}.nii.gz")
def main(): det_data_dir = Path(os.getenv('det_data')) task_data_dir = det_data_dir / "Task019FG_ADAM" # setup raw paths source_data_dir = task_data_dir / "raw" / "ADAM_release_subjs" if not source_data_dir.is_dir(): raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.") # setup raw splitted dirs target_data_dir = task_data_dir / "raw_splitted" / "imagesTr" target_data_dir.mkdir(exist_ok=True, parents=True) target_label_dir = task_data_dir / "raw_splitted" / "labelsTr" target_label_dir.mkdir(exist_ok=True, parents=True) # prepare dataset info meta = { "name": "ADAM", "task": "Task019FG_ADAM", "target_class": None, "test_labels": False, "labels": {"0": "Aneurysm"}, # since we are running FG vs BG this is not completely correct "modalities": {"0": "Structured", "1": "TOF"}, "dim": 3, } save_json(meta, task_data_dir / "dataset.json") # prepare data case_ids = [p.stem for p in source_data_dir.iterdir() if p.is_dir()] print(f"Found {len(case_ids)} case ids") for cid in maybe_verbose_iterable(case_ids): run_prep_fg_v_bg( case_id=cid, source_data=source_data_dir, target_data_dir=target_data_dir, target_label_dir=target_label_dir, )
assert len(image_paths) == len(label_paths) meta = { "name": "RibFracFG", "task": "Task020FG_RibFrac", "target_class": None, "test_labels": False, "labels": { "0": "fracture" }, # since we are running FG vs BG this is not completely correct "modalities": { "0": "CT" }, "dim": 3, } save_json(meta, task_data_dir / "dataset.json") for ip, lp in maybe_verbose_iterable(list(zip(image_paths, label_paths))): create( image_source=ip, label_source=lp, image_target_dir=target_data_dir, label_target_dir=target_label_dir, df=df, fg_only=True, ) if __name__ == '__main__': main()
def prepare_detection_label( case_id: str, label_dir: Path, things_classes: Sequence[int], stuff_classes: Sequence[int], min_size: float = 0, min_vol: float = 0, ): if (label_dir / f"{case_id}.json").is_file(): logger.info(f"Found existing case {case_id} -> skipping") return logger.info(f"Processing {case_id}") seg_itk = load_sitk(label_dir / f"{case_id}.nii.gz") spacing = np.asarray(seg_itk.GetSpacing())[::-1] seg = sitk.GetArrayFromImage(seg_itk) # prepare stuff information stuff_seg = np.zeros_like(seg) if stuff_classes: for new_class, old_class in enumerate(stuff_classes, start=1): stuff_seg[seg == old_class] = new_class stuff_seg_itk = copy_meta_data_itk(seg_itk, sitk.GetImageFromArray(stuff_seg)) sitk.WriteImage(stuff_seg_itk, str(label_dir / f"{case_id}_stuff.nii.gz")) # prepare things information structure = np.ones([3] * seg.ndim) things_seg = np.copy(seg) things_seg[stuff_seg > 0] = 0 # remove all stuff classes from segmentation instances_not_filtered, _ = label(things_seg, structure=structure) final_mapping = {} if instances_not_filtered.max() > 0: boxes = get_bbox_np(instances_not_filtered[None])["boxes"] box_sizes = box_size_np(boxes) instance_ids = np.unique(instances_not_filtered) instance_ids = instance_ids[instance_ids > 0] assert len(instance_ids) == len(boxes) isotopic_axis = list(range(seg.ndim)) isotopic_axis.pop(np.argmax(spacing)) instances = np.zeros_like(instances_not_filtered) start_id = 1 for iid, bsize in zip(instance_ids, box_sizes): bsize_world = bsize * spacing instance_mask = (instances_not_filtered == iid) instance_vol = instance_mask.sum() if all(bsize_world[isotopic_axis] > min_size) and (instance_vol > min_vol): instances[instance_mask] = start_id single_idx = np.argwhere(instance_mask)[0] semantic_class = int(seg[tuple(single_idx)]) final_mapping[start_id] = things_classes.index(semantic_class) start_id += 1 else: instances = np.zeros_like(instances_not_filtered) final_instances_itk = copy_meta_data_itk(seg_itk, sitk.GetImageFromArray(instances)) sitk.WriteImage(final_instances_itk, str(label_dir / f"{case_id}.nii.gz")) save_json({"instances": final_mapping}, label_dir / f"{case_id}.json") sitk.WriteImage(seg_itk, str(label_dir / f"{case_id}_orig.nii.gz"))
if do_volume_ranking: for postfix in ["Tr", "Ts"]: if (label_dir := splitted_dir / f"labels{postfix}").is_dir(): ranking = [] for case_id in tqdm( [f.stem for f in label_dir.glob("*.json")]): instances = load_sitk_as_array(label_dir / f"{case_id}.nii.gz")[0] instance_ids, instance_counts = np.unique( instances, return_counts=True) cps = [ np.argwhere(instances == iid)[0].tolist() for iid in instance_ids[1:] ] assert len(instance_ids) - 1 == len(cps) tmp = [{ "case_id": str(case_id), "instance_id": int(iid), "vol": int(vol), "cp": list(cp)[::-1] } for iid, vol, cp in zip(instance_ids[1:], instance_counts[1:], cps)] ranking.extend(tmp) ranking = sorted(ranking, key=lambda x: x["vol"]) save_json(ranking, splitted_dir / f"volume_ranking_{postfix}.json") else: logger.info( f"Did not find dir {label_dir} for volume ranking")
def main(): """ Generate an example dataset for nnDetection to test the installation or experiment with ideas. """ parser = argparse.ArgumentParser() parser.add_argument( '--full', help="Increase size of dataset. " "Default sizes train/test 10/10 and full 1000/1000.", action='store_true', ) parser.add_argument( '--num_processes', help="Use multiprocessing to create dataset.", type=int, default=0, ) args = parser.parse_args() full = args.full num_processes = args.num_processes num_images_tr = 1000 if full else 10 num_images_ts = 1000 if full else 10 meta = { "task": f"Task000D{dim}_Example", "name": "Example", "target_class": None, "test_labels": True, "labels": { "0": "Square", "1": "SquareHole" }, "modalities": { "0": "MRI" }, "dim": dim, } # setup paths data_task_dir = Path(os.getenv("det_data")) / meta["task"] data_task_dir.mkdir(parents=True, exist_ok=True) save_json(meta, data_task_dir / "dataset.json") raw_splitted_dir = data_task_dir / "raw_splitted" images_tr_dir = raw_splitted_dir / "imagesTr" images_tr_dir.mkdir(parents=True, exist_ok=True) labels_tr_dir = raw_splitted_dir / "labelsTr" labels_tr_dir.mkdir(parents=True, exist_ok=True) images_ts_dir = raw_splitted_dir / "imagesTs" images_ts_dir.mkdir(parents=True, exist_ok=True) labels_ts_dir = raw_splitted_dir / "labelsTs" labels_ts_dir.mkdir(parents=True, exist_ok=True) if num_processes == 0: for idx in range(num_images_tr): generate_image( images_tr_dir, labels_tr_dir, idx, ) for idx in range(num_images_tr, num_images_tr + num_images_ts): generate_image( images_ts_dir, labels_ts_dir, idx, ) else: logger.info("Using multiprocessing to create example dataset.") with Pool(processes=num_processes) as p: p.starmap( generate_image, zip( repeat(images_tr_dir), repeat(labels_tr_dir), range(num_images_tr), )) with Pool(processes=num_processes) as p: p.starmap( generate_image, zip( repeat(images_ts_dir), repeat(labels_ts_dir), range(num_images_tr, num_images_tr + num_images_ts), ))
def prepare_case(case_id, data_dirs, ktrans_dirs, t2_masks, df_labels, df_masks, data_target, label_target, ): try: logger.info(f"Preparing {case_id}") tmp_dir = data_dirs / case_id _dirs = [f for f in tmp_dir.iterdir() if f.is_dir()] assert len(_dirs) == 1 data_dir = tmp_dir / _dirs[0] df_mask_case = df_masks[df_masks['T2'].str.contains(case_id)] assert len(df_mask_case) == 1 t2_mask_file = df_mask_case.iloc[0]["T2"] assert f"{case_id}" in t2_mask_file t2_series_id = int(t2_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1]) adc_mask_file = df_mask_case.iloc[0]["ADC"] assert f"{case_id}" in adc_mask_file if case_id == "ProstateX-0025": # case 0025 has a 7a inside the table adc_series_id = 7 assert adc_mask_file.endswith("7a.nii.gz") elif case_id == "ProstateX-0113": # even though the table shows 9 as the series # ID we use 10 because 9 is not an ADC file? adc_series_id = int(adc_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1]) assert adc_series_id == 9 adc_series_id = 10 else: adc_series_id = int(adc_mask_file.rsplit(".", 2)[0].rsplit('_', 1)[1]) # T2 t2_dir = [f for f in data_dir.glob("*t2*") if f.name.startswith(f"{t2_series_id}.")] assert len(t2_dir) == 1 t2_data_itk = load_dicom_series_sitk(t2_dir[0]) # ADC adc_dir = [f for f in data_dir.glob("*ADC*") if f.name.startswith(f"{adc_series_id}.")] assert len(adc_dir) == 1 adc_data_itk = load_dicom_series_sitk(adc_dir[0]) # PD-W pdw_dir = sorted(data_dir.glob("* PD *"))[-1] pdw_data_itk = load_dicom_series_sitk(pdw_dir) # k-trans ktrans_dir = ktrans_dirs / case_id ktrans_data_itk = load_sitk(ktrans_dir / f"{case_id}-Ktrans.mhd") # resample data to t2 (only early fusion is currently supported) resampler = sitk.ResampleImageFilter() # default linear resampler.SetReferenceImage(t2_data_itk) adc_data_itk_res = resampler.Execute(adc_data_itk) pdw_data_itk_res = resampler.Execute(pdw_data_itk) ktrans_data_itk_res = resampler.Execute(ktrans_data_itk) # prepare mask mask_paths = list(t2_masks.glob(f"{case_id}*")) fids = [int([l for l in mp.name.split("-") if "Finding" in l][0][7:]) for mp in mask_paths] mask_itk = load_sitk(str(mask_paths[0])) mask = sitk.GetArrayFromImage(mask_itk) mask[mask > 0] = 1 for idx, mp in enumerate(mask_paths[1:], start=2): _mask = load_sitk_as_array(str(mp))[0] mask[_mask > 0] = idx mask_final = sitk.GetImageFromArray(mask) copy_meta_data_itk(t2_data_itk, mask_final) df_case = df_labels.loc[df_labels['ProxID'] == case_id] instances = {} for row in df_case.itertuples(): if row.fid in fids: instances[fids.index(int(row.fid)) + 1] = int(row.ClinSig) else: logger.info(f"Found removed fid {row.fid} in {case_id}") # save sitk.WriteImage(t2_data_itk, str(data_target / f"{case_id}_0000.nii.gz")) sitk.WriteImage(adc_data_itk_res, str(data_target / f"{case_id}_0001.nii.gz")) sitk.WriteImage(pdw_data_itk_res, str(data_target / f"{case_id}_0002.nii.gz")) sitk.WriteImage(ktrans_data_itk_res, str(data_target / f"{case_id}_0003.nii.gz")) sitk.WriteImage(mask_final, str(label_target / f"{case_id}.nii.gz")) save_json({"instances": instances}, label_target / f"{case_id}.json") except Exception as e: logger.error(f"Case {case_id} failed with {e} and {traceback.format_exc()}")
def main(): """ Does not use the KTrans Sequence of ProstateX This script only uses the provided T2 masks """ det_data_dir = Path(os.getenv('det_data')) task_data_dir = det_data_dir / "Task021_ProstateX" # setup raw paths source_data_dir = task_data_dir / "raw" if not source_data_dir.is_dir(): raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.") source_data = source_data_dir / "PROSTATEx" source_masks = source_data_dir / "rcuocolo-PROSTATEx_masks-e344452" source_ktrans = source_data_dir / "ktrains" csv_labels = source_data_dir / "ProstateX-TrainingLesionInformationv2" / "ProstateX-Findings-Train.csv" csv_masks = source_data_dir / "rcuocolo-PROSTATEx_masks-e344452" / "Files" / "Image_list.csv" data_target = task_data_dir / "raw_splitted" / "imagesTr" data_target.mkdir(parents=True, exist_ok=True) label_target = task_data_dir / "raw_splitted" / "labelsTr" label_target.mkdir(parents=True, exist_ok=True) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") logger.add(data_target.parent.parent / "prepare.log", level="DEBUG") base_masks = source_masks / "Files" / "Masks" t2_masks = base_masks / "T2" df_labels = pd.read_csv(csv_labels) df_masks = pd.read_csv(csv_masks) case_ids = [f.stem.split("-", 2)[:2] for f in t2_masks.glob("*nii.gz")] case_ids = list(set([f"{c[0]}-{c[1]}" for c in case_ids])) logger.info(f"Found {len(case_ids)} cases") # save meta logger.info("Saving dataset info") dataset_info = { "name": "ProstateX", "task": "Task021_ProstateX", "target_class": None, "test_labels": False, "labels": { "0": "clinically_significant", "1": "clinically_insignificant", }, "modalities": { "0": "T2", "1": "ADC", "2": "PD-W", "3": "Ktrans" }, "dim": 3, "info": "Ground Truth: T2 Masks; \n" "Modalities: T2, ADC, PD-W, Ktrans \n;" "Classes: clinically significant = 1, insignificant = 0 \n" "Keep: ProstateX-0025 '10-28-2011-MR prostaat kanker detectie WDSmc MCAPRODETW-19047'\n" "Masks\n" "https://github.com/rcuocolo/PROSTATEx_masks\n" "Github hash: e3444521e70cd5e8d405f4e9a6bc08312df8afe7" } save_json(dataset_info, task_data_dir / "dataset.json") # prepare labels and data for cid in maybe_verbose_iterable(case_ids): prepare_case(cid, data_dirs=source_data, ktrans_dirs=source_ktrans, t2_masks=t2_masks, df_labels=df_labels, df_masks=df_masks, data_target=data_target, label_target=label_target, ) # with Pool(processes=6) as p: # p.starmap(prepare_case, zip(case_ids, # repeat(source_data), # repeat(source_ktrans), # repeat(t2_masks), # repeat(df_labels), # repeat(df_masks), # repeat(data_target), # repeat(label_target), # )) # create test split create_test_split(task_data_dir / "raw_splitted", num_modalities=len(dataset_info["modalities"]), test_size=0.3, random_state=0, shuffle=True, )
def main(): det_data_dir = Path(os.getenv('det_data')) task_data_dir = det_data_dir / "Task011_Kits" source_data_dir = task_data_dir / "raw" if not source_data_dir.is_dir(): raise RuntimeError( f"{source_data_dir} should contain the raw data but does not exist." ) splitted_dir = task_data_dir / "raw_splitted" target_data_dir = task_data_dir / "raw_splitted" / "imagesTr" target_data_dir.mkdir(exist_ok=True, parents=True) target_label_dir = task_data_dir / "raw_splitted" / "labelsTr" target_label_dir.mkdir(exist_ok=True, parents=True) logger.remove() logger.add(sys.stdout, level="INFO") logger.add(task_data_dir / "prepare.log", level="DEBUG") # save meta info dataset_info = { "name": "Kits", "task": "Task011_Kits", "target_class": None, "test_labels": True, "seg2det_stuff": [ 1, ], # define stuff classes: kidney "seg2det_things": [ 2, ], # define things classes: tumor "min_size": 3., "labels": { "0": "lesion" }, "labels_stuff": { "1": "kidney" }, "modalities": { "0": "CT" }, "dim": 3, } save_json(dataset_info, task_data_dir / "dataset.json") # prepare cases cases = [str(c.name) for c in source_data_dir.iterdir() if c.is_dir()] for c in maybe_verbose_iterable(cases): logger.info(f"Copy case {c}") case_id = int(c.split("_")[-1]) if case_id < 210: shutil.copy(source_data_dir / c / "imaging.nii.gz", target_data_dir / f"{c}_0000.nii.gz") shutil.copy(source_data_dir / c / "segmentation.nii.gz", target_label_dir / f"{c}.nii.gz") # create an artificial test split create_test_split( splitted_dir=splitted_dir, num_modalities=1, test_size=0.3, random_state=0, shuffle=True, )
def main(): det_data_dir = Path(os.getenv("det_data")) task_data_dir = det_data_dir / "Task025_LymphNodes" source_data_base = task_data_dir / "raw" if not source_data_base.is_dir(): raise RuntimeError( f"{source_data_base} should contain the raw data but does not exist." ) raw_splitted_dir = task_data_dir / "raw_splitted" (raw_splitted_dir / "imagesTr").mkdir(parents=True, exist_ok=True) (raw_splitted_dir / "labelsTr").mkdir(parents=True, exist_ok=True) (raw_splitted_dir / "imagesTs").mkdir(parents=True, exist_ok=True) (raw_splitted_dir / "labelsTs").mkdir(parents=True, exist_ok=True) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="DEBUG") logger.add(raw_splitted_dir.parent / "prepare.log", level="DEBUG") meta = { "name": "Lymph Node TCIA", "task": "Task025_LymphNodes", "target_class": None, "test_labels": True, "labels": { "0": "LymphNode", }, "modalities": { "0": "CT", }, "dim": 3, } save_json(meta, raw_splitted_dir.parent / "dataset.json") base_dir = source_data_base / "CT Lymph Nodes" mask_dir = source_data_base / "MED_ABD_LYMPH_MASKS" case_ids = sorted([p.name for p in base_dir.iterdir() if p.is_dir()]) logger.info(f"Found {len(case_ids)} cases in {base_dir}") for cid in maybe_verbose_iterable(case_ids): prepare_image( case_id=cid, base_dir=base_dir, mask_dir=mask_dir, raw_splitted_dir=raw_splitted_dir, ) # with Pool(processes=6) as p: # p.starmap( # prepare_image, # zip( # case_ids, # repeat(base_dir), # repeat(mask_dir), # repeat(raw_splitted_dir) # ) # ) create_test_split( raw_splitted_dir, num_modalities=len(meta["modalities"]), test_size=0.3, random_state=0, shuffle=True, )
def boxes2nii(): import os import argparse from pathlib import Path import numpy as np import SimpleITK as sitk from loguru import logger from nndet.io import save_json, load_pickle from nndet.io.paths import get_task, get_training_dir from nndet.utils.info import maybe_verbose_iterable parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help="Task id e.g. Task12_LIDC OR 12 OR LIDC") parser.add_argument('model', type=str, help="model name, e.g. RetinaUNetV0") parser.add_argument('-f', '--fold', type=int, help="fold to sweep.", default=0, required=False) parser.add_argument('-o', '--overwrites', type=str, nargs='+', help="overwrites for config file", required=False) parser.add_argument( '--threshold', type=float, help="Minimum probability of predictions", required=False, default=0.5, ) parser.add_argument('--test', action='store_true') args = parser.parse_args() model = args.model fold = args.fold task = args.task overwrites = args.overwrites test = args.test threshold = args.threshold task_name = get_task(task, name=True, models=True) task_dir = Path(os.getenv("det_models")) / task_name training_dir = get_training_dir(task_dir / model, fold) overwrites = overwrites if overwrites is not None else [] overwrites.append("host.parent_data=${env:det_data}") overwrites.append("host.parent_results=${env:det_models}") prediction_dir = training_dir / "test_predictions" \ if test else training_dir / "val_predictions" save_dir = training_dir / "test_predictions_nii" \ if test else training_dir / "val_predictions_nii" save_dir.mkdir(exist_ok=True) case_ids = [ p.stem.rsplit('_', 1)[0] for p in prediction_dir.glob("*_boxes.pkl") ] for cid in maybe_verbose_iterable(case_ids): res = load_pickle(prediction_dir / f"{cid}_boxes.pkl") instance_mask = np.zeros(res["original_size_of_raw_data"], dtype=np.uint8) boxes = res["pred_boxes"] scores = res["pred_scores"] labels = res["pred_labels"] _mask = scores >= threshold boxes = boxes[_mask] labels = labels[_mask] scores = scores[_mask] idx = np.argsort(scores) scores = scores[idx] boxes = boxes[idx] labels = labels[idx] prediction_meta = {} for instance_id, (pbox, pscore, plabel) in enumerate(zip(boxes, scores, labels), start=1): mask_slicing = [ slice(int(pbox[0]), int(pbox[2])), slice(int(pbox[1]), int(pbox[3])), ] if instance_mask.ndim == 3: mask_slicing.append(slice(int(pbox[4]), int(pbox[5]))) instance_mask[tuple(mask_slicing)] = instance_id prediction_meta[int(instance_id)] = { "score": float(pscore), "label": int(plabel), "box": list(map(int, pbox)) } logger.info( f"Created instance mask with {instance_mask.max()} instances.") instance_mask_itk = sitk.GetImageFromArray(instance_mask) instance_mask_itk.SetOrigin(res["itk_origin"]) instance_mask_itk.SetDirection(res["itk_direction"]) instance_mask_itk.SetSpacing(res["itk_spacing"]) sitk.WriteImage(instance_mask_itk, str(save_dir / f"{cid}_boxes.nii.gz")) save_json(prediction_meta, save_dir / f"{cid}_boxes.json")