def prepare_data(input_folder, preproc_folder, idx_start, idx_end, bias_correction): images = [] affines = [] patnames = [] masks = [] # read the foldernames foldernames = sorted(glob.glob(input_folder + '*/')) logging.info('Number of images in the dataset: %s' % str(len(foldernames))) # iterate through all indices for idx in range(len(foldernames)): # only consider images within the indices requested if (idx < idx_start) or (idx >= idx_end): logging.info('skipping subject: %d' % idx) continue # get the file name for this subject foldername = foldernames[idx] # extract the patient name _patname = foldername[foldername[:-1].rfind('/') + 1:-1] if _patname == 'A00033264': # this subject has images of a different size continue # ==================================================== # search for the segmentation file # ==================================================== name = foldername + 'orig_labels_aligned_with_true_image.nii.gz' # segmentation mask with ~100 classes logging.info('==============================================') logging.info('reading segmentation mask: %s' % name) # read the segmentation mask _seg_data, _seg_affine, _seg_header = utils.load_nii(name) # group the segmentation classes as required _seg_data = utils.group_segmentation_classes(_seg_data) # ==================================================== # read the image file # ==================================================== if bias_correction is True: name = foldername + 'MPRAGE_n4.nii' # read the original image else: name = foldername + 'MPRAGE.nii' # read the original image # ==================================================== # bias correction before reading the image file (optional) # ==================================================== # read the image logging.info('reading image: %s' % name) _img_data, _img_affine, _img_header = utils.load_nii(name) # _img_header.get_zooms() = (1.0, 1.0, 1.0) # ============ # create a segmentation mask and use it to get rid of the skull in the image # ============ seg_mask = np.copy(_seg_data) seg_mask[_seg_data > 0] = 1 img_masked = _img_data * seg_mask # normalise the image _img_data = utils.normalise_image(img_masked, norm_type='div_by_max') # ============ # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images # ============ img_rescaled = rescale(image=_img_data, scale=10 / 7, order=1, preserve_range=True, multichannel=False) seg_rescaled = rescale(image=_seg_data, scale=10 / 7, order=0, preserve_range=True, multichannel=False) # ============ # A lot of the periphery is just zeros, so get rid of some of it # ============ # define how much of the image can be cropped out as it consists of zeros x_start = 13 x_end = -14 y_start = 55 y_end = -55 z_start = 55 + 16 + 50 z_end = -55 - 16 + 50 # original images are 176 * 256 * 256 # rescaling them makes them 251 * 366 * 366 # cropping them down to 224 * 256 * 224 img_rescaled = img_rescaled[x_start:x_end, y_start:y_end, z_start:z_end] seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end, z_start:z_end] # save the pre-processed segmentation ground truth utils.makefolder(preproc_folder + _patname) utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii', seg_rescaled, _seg_affine) if bias_correction is True: utils.save_nii( preproc_folder + _patname + '/preprocessed_image_n4.nii', img_rescaled, _img_affine) else: utils.save_nii( preproc_folder + _patname + '/preprocessed_image.nii', img_rescaled, _img_affine) # append to lists images.append(img_rescaled) affines.append(_img_affine) patnames.append(_patname) masks.append(seg_rescaled) # convert the lists to arrays images = np.array(images) affines = np.array(affines) patnames = np.array(patnames) masks = np.array(masks, dtype='uint8') # ======================== # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks # ======================== images = images.swapaxes(1, 2) images = images.reshape(-1, images.shape[2], images.shape[3]) masks = masks.swapaxes(1, 2) masks = masks.reshape(-1, masks.shape[2], masks.shape[3]) # save the processed images and masks so that they can be directly read the next time # make appropriate filenames according to the requested indices of training, validation and test images logging.info('Saving pre-processed files...') config_details = 'from%dto%d_' % (idx_start, idx_end) if bias_correction is True: filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy' else: filepath_images = preproc_folder + config_details + 'images_2d.npy' filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy' filepath_affine = preproc_folder + config_details + 'affines.npy' filepath_patnames = preproc_folder + config_details + 'patnames.npy' np.save(filepath_images, images) np.save(filepath_masks, masks) np.save(filepath_affine, affines) np.save(filepath_patnames, patnames) return images, masks, affines, patnames
def main(): patients = [] for dir in directories: for patient in os.listdir(dir): if not patient.startswith('.'): patients.append({ 'patient': patient, 'path': os.path.join(dir, patient) }) print(len(patients)) #shuffle and split into train and validation sets np.random.shuffle(patients) print(opt.split * len(patients)) split_index = int(opt.split * len(patients)) training_set = patients[:split_index] validation_set = patients[split_index:] print('Training set: {} patients'.format(len(training_set))) print('Validation set: {} patients'.format(len(validation_set))) with tqdm(enumerate(training_set), total=len(training_set)) as pbar: # progress bar for i, item in pbar: for root, dirs, files in os.walk(item['path']): for file in files: if file.endswith('.nii.gz'): full_file_path = os.path.join(root, file) output_path = os.path.join( opt.output_dir, 'train', item['patient'], file.replace('.nii.gz', '.nii')) # Create output directory if not yet exists directory = os.path.dirname(output_path) if not os.path.exists(directory): os.makedirs(directory) # Read NifTI file data, affine = utils.read_nii(full_file_path) # Normalize if not file.endswith('seg.nii.gz'): data, stats = normalize(data) # Transpose data = data.transpose(2, 0, 1) # Save to file utils.save_nii(data, output_path) with tqdm(enumerate(validation_set), total=len(validation_set)) as pbar: # progress bar for i, item in pbar: for root, dirs, files in os.walk(item['path']): for file in files: if file.endswith('.nii.gz'): full_file_path = os.path.join(root, file) output_path = os.path.join( opt.output_dir, 'val', item['patient'], file.replace('.nii.gz', '.nii')) # Create output directory if not yet exists directory = os.path.dirname(output_path) if not os.path.exists(directory): os.makedirs(directory) # Read NifTI file data, affine = utils.read_nii(full_file_path) # Normalize if not file.endswith('seg.nii.gz'): data, stats = normalize(data) # Transpose data = data.transpose(2, 0, 1) # Save to file utils.save_nii(data, output_path)