def get_cv_fold(fold, dataset="HCP"): ''' Brauche train-test-validate wegen Best-model selection und wegen training von combined net :return: ''' #For CV if fold == 0: train, validate, test = [0, 1, 2], [3], [4] # train, validate, test = [0, 1, 2, 3, 4], [3], [4] elif fold == 1: train, validate, test = [1, 2, 3], [4], [0] elif fold == 2: train, validate, test = [2, 3, 4], [0], [1] elif fold == 3: train, validate, test = [3, 4, 0], [1], [2] elif fold == 4: train, validate, test = [4, 0, 1], [2], [3] subjects = get_all_subjects(dataset) if dataset.startswith("HCP"): # subjects = list(Utils.chunks(subjects[:100], 10)) #10 folds subjects = list(utils.chunks(subjects, 21)) #5 folds a 21 subjects # => 5 fold CV ok (score only 1%-point worse than 10 folds (80 vs 60 train subjects) (10 Fold CV impractical!) elif dataset.startswith("Schizo"): # 410 subjects subjects = list(utils.chunks(subjects, 82)) # 5 folds a 82 subjects else: raise ValueError("Invalid dataset name") subjects = np.array(subjects) return list(subjects[train].flatten()), list(subjects[validate].flatten()), list(subjects[test].flatten())
def create_one_3D_file(): ''' Create one big file which contains all 3D Images (not slices). ''' class Config: DATASET = "HCP" RESOLUTION = "1.25mm" FEATURES_FILENAME = "270g_125mm_peaks" LABELS_TYPE = np.int16 DATASET_FOLDER = "HCP" data_all = [] seg_all = [] print("\n\nProcessing Data...") for s in get_all_subjects(): print("processing data subject {}".format(s)) data = nib.load( join(C.HOME, Config.DATASET_FOLDER, s, Config.FEATURES_FILENAME + ".nii.gz")).get_data() data = np.nan_to_num(data) data = dataset_utils.scale_input_to_unet_shape(data, Config.DATASET, Config.RESOLUTION) data_all.append(np.array(data)) np.save("data.npy", data_all) del data_all # free memory print("\n\nProcessing Segs...") for s in get_all_subjects(): print("processing seg subject {}".format(s)) seg = img_utils.create_multilabel_mask(Config, s, labels_type=Config.LABELS_TYPE) if Config.RESOLUTION == "2.5mm": seg = img_utils.resize_first_three_dims(seg, order=0, zoom=0.5) seg = dataset_utils.scale_input_to_unet_shape(seg, Config.DATASET, Config.RESOLUTION) seg_all.append(np.array(seg)) print("SEG TYPE: {}".format(seg_all.dtype)) np.save("seg.npy", seg_all)
def save_fusion_nifti_as_npy(): #Can leave this always the same (for 270g and 32g) class Config: DATASET = "HCP" RESOLUTION = "1.25mm" FEATURES_FILENAME = "270g_125mm_peaks" LABELS_TYPE = np.int16 LABELS_FILENAME = "bundle_masks" DATASET_FOLDER = "HCP" DIFFUSION_FOLDER = "32g_25mm" subjects = get_all_subjects() print("\n\nProcessing Data...") for s in subjects: print("processing data subject {}".format(s)) start_time = time.time() data = nib.load( join(C.NETWORK_DRIVE, "HCP_fusion_" + DIFFUSION_FOLDER, s + "_probmap.nii.gz")).get_data() print("Done Loading") data = np.nan_to_num(data) data = dataset_utils.scale_input_to_unet_shape(data, Config.DATASET, Config.RESOLUTION) # cut one pixel at the end, because in scale_input_to_world_shape we ouputted 146 -> one too much at the end data = data[:-1, :, :-1, :] exp_utils.make_dir( join(C.NETWORK_DRIVE, "HCP_fusion_npy_" + DIFFUSION_FOLDER, s)) np.save( join(C.NETWORK_DRIVE, "HCP_fusion_npy_" + DIFFUSION_FOLDER, s, DIFFUSION_FOLDER + "_xyz.npy"), data) print("Took {}s".format(time.time() - start_time)) print("processing seg subject {}".format(s)) start_time = time.time() # seg = ImgUtils.create_multilabel_mask(Config, s, labels_type=Config.LABELS_TYPE) seg = nib.load( join(C.NETWORK_DRIVE, "HCP_for_training_COPY", s, Config.LABELS_FILENAME + ".nii.gz")).get_data() if Config.RESOLUTION == "2.5mm": seg = img_utils.resize_first_three_dims(seg, order=0, zoom=0.5) seg = dataset_utils.scale_input_to_unet_shape(seg, Config.DATASET, Config.RESOLUTION) np.save( join(C.NETWORK_DRIVE, "HCP_fusion_npy_" + DIFFUSION_FOLDER, s, "bundle_masks.npy"), seg) print("Took {}s".format(time.time() - start_time))
# if idx > 0: # np.save(join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".npy"), data) nib.save( nib.Nifti1Image(data, affine), join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".nii.gz")) else: print("skipping file: {}-{}".format(subject, idx)) raise IOError("File missing") for filename in filenames_seg: img = nib.load( join(C.NETWORK_DRIVE, DATASET_FOLDER, subject, filename + ".nii.gz")) data = img.get_data() data, _, _, _ = dataset_utils.crop_to_nonzero(data, bbox=bbox) # np.save(join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".npy"), data) nib.save( nib.Nifti1Image(data, img.affine), join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".nii.gz")) if __name__ == "__main__": print("Output folder: {}".format(DATASET_FOLDER_PREPROC)) subjects = get_all_subjects(dataset=dataset) Parallel(n_jobs=12)(delayed(create_preprocessed_files)(subject) for subject in subjects) # for subject in subjects: # create_preprocessed_files(subject)