def get_cv_fold(fold, dataset="HCP"): if dataset == "HCP_all": subjects = get_all_subjects(dataset) cut_point = int(len(subjects) * 0.9) return subjects[:cut_point], subjects[cut_point:], ["599671", "599469"] elif dataset == "biobank_20k": subjects = get_all_subjects(dataset) cut_point = int(len(subjects) * 0.9) return subjects[:cut_point], subjects[cut_point:], ["1000013", "1000013"] else: if fold == 0: train, validate, test = [0, 1, 2], [3], [4] elif fold == 1: train, validate, test = [1, 2, 3], [4], [0] elif fold == 2: train, validate, test = [2, 3, 4], [0], [1] elif fold == 3: train, validate, test = [3, 4, 0], [1], [2] elif fold == 4: train, validate, test = [4, 0, 1], [2], [3] subjects = get_all_subjects(dataset) if dataset.startswith("HCP"): subjects = list(utils.chunks(subjects, 21)) #5 folds a 21 subjects # 5 fold CV ok (score only 1%-point worse than 10 folds (80 vs 60 train subjects) (10 Fold CV impractical!) elif dataset.startswith("Schizo"): # ~410 subjects subjects = list(utils.chunks(subjects, 82)) # 5 folds a 82 subjects else: raise ValueError("Invalid dataset name") subjects = np.array(subjects) return list(subjects[train].flatten()), list(subjects[validate].flatten()), list(subjects[test].flatten())
# np.save(join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".npy"), data) nib.save( nib.Nifti1Image(data, affine), join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".nii.gz")) else: print("skipping file: {}-{}".format(subject, idx)) raise IOError("File missing") for filename in filenames_seg: img = nib.load( join(C.NETWORK_DRIVE, DATASET_FOLDER, subject, filename + ".nii.gz")) data = img.get_data() data, _, _, _ = data_utils.crop_to_nonzero(data, bbox=bbox) # np.save(join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".npy"), data) nib.save( nib.Nifti1Image(data, img.affine), join(C.DATA_PATH, DATASET_FOLDER_PREPROC, subject, filename + ".nii.gz")) if __name__ == "__main__": print("Output folder: {}".format(DATASET_FOLDER_PREPROC)) subjects = get_all_subjects(dataset=dataset) Parallel(n_jobs=12)(delayed(create_preprocessed_files)(subject) for subject in subjects) # for subject in subjects: # create_preprocessed_files(subject)