def load_without_size_preprocessing(input_folder, site_name, idx, depth): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + site_name + '/*/')) # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths( filenames[idx]) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) image = np.swapaxes( image, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = np.swapaxes( label, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets label = utils.group_segmentation_classes( label) # group the segmentation classes as required # ============ # create a segmentation mask and use it to get rid of the skull in the image # ============ label_mask = np.copy(label) label_mask[label > 0] = 1 image = image * label_mask # ================== # crop out some portion of the image, which are all zeros (rough registration via visual inspection) # ================== if site_name is 'CALTECH': image = image[:, 80:, :] label = label[:, 80:, :] elif site_name is 'STANFORD': image, label = center_image_and_label(image, label) # ================== # crop volume along z axis (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_z(image, depth) label = utils.crop_or_pad_volume_to_size_along_z(label, depth) # ================== # normalize the image # ================== image = utils.normalise_image(image, norm_type='div_by_max') return image, label
def load_without_size_preprocessing(input_folder, idx, protocol, preprocessing_folder, depth): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + '*.zip')) # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths( filenames[idx], protocol, preprocessing_folder) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) image = np.swapaxes( image, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets image = utils.normalise_image(image, norm_type='div_by_max') # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = np.swapaxes( label, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets label = utils.group_segmentation_classes( label) # group the segmentation classes as required # ================== # crop volume along z axis (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_z(image, depth) label = utils.crop_or_pad_volume_to_size_along_z(label, depth) return image, label
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size, target_resolution, preprocessing_folder): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + '*.zip')) logging.info('Number of images in the dataset: %s' % str(len(filenames))) # ======================= # create a hdf5 file # ======================= # hdf5_file = h5py.File(output_file, "w") # # # =============================== # # Create datasets for images and labels # # =============================== # data = {} # num_subjects = idx_end - idx_start # # data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32) # data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8) # # # =============================== # initialize lists # =============================== label_list = [] image_list = [] nx_list = [] ny_list = [] nz_list = [] px_list = [] py_list = [] pz_list = [] pat_names_list = [] # =============================== # initiate counter # =============================== patient_counter = 0 # =============================== # iterate through the requested indices # =============================== for idx in range(idx_start, idx_end): logging.info('Volume {} of {}...'.format(idx, idx_end)) # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths( filenames[idx], protocol, preprocessing_folder) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = utils.group_segmentation_classes( label) # group the segmentation classes as required # # ================== # # collect some header info. # # ================== # px_list.append(float(image_hdr.get_zooms()[0])) # py_list.append(float(image_hdr.get_zooms()[1])) # pz_list.append(float(image_hdr.get_zooms()[2])) # nx_list.append(image.shape[0]) # ny_list.append(image.shape[1]) # nz_list.append(image.shape[2]) # pat_names_list.append(patient_name) # ================== # crop volume along all axes from the ends (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_x(image, 256) label = utils.crop_or_pad_volume_to_size_along_x(label, 256) image = utils.crop_or_pad_volume_to_size_along_y(image, 256) label = utils.crop_or_pad_volume_to_size_along_y(label, 256) image = utils.crop_or_pad_volume_to_size_along_z(image, 256) label = utils.crop_or_pad_volume_to_size_along_z(label, 256) # ================== # normalize the image # ================== image_normalized = utils.normalise_image(image, norm_type='div_by_max') # ====================================================== # rescale, crop / pad to make all images of the required size and resolution # ====================================================== scale_vector = [ image_hdr.get_zooms()[0] / target_resolution[0], image_hdr.get_zooms()[1] / target_resolution[1], image_hdr.get_zooms()[2] / target_resolution[2] ] image_rescaled = transform.rescale(image_normalized, scale_vector, order=1, preserve_range=True, multichannel=False, mode='constant') # label_onehot = utils.make_onehot(label, nlabels=15) # # label_onehot_rescaled = transform.rescale(label_onehot, # scale_vector, # order=1, # preserve_range=True, # multichannel=True, # mode='constant') # # label_rescaled = np.argmax(label_onehot_rescaled, axis=-1) # # # ============ # # the images and labels have been rescaled to the desired resolution. # # write them to the hdf5 file now. # # ============ # image_list.append(image_rescaled) # label_list.append(label_rescaled) # ============ # write to file # ============ # image_rescaled volume_dir = os.path.join(preprocessing_folder, 'volume_{:06d}'.format(idx)) os.makedirs(volume_dir, exist_ok=True) for i in range(size[1]): slice_path = os.path.join(volume_dir, 'slice_{:06d}.jpeg'.format(i)) slice = image_rescaled[:, i, :] * 255 image = Image.fromarray(slice.astype(np.uint8)) image.save(slice_path)
def prepare_data(input_folder, preproc_folder, protocol, idx_start, idx_end): images = [] affines = [] patnames = [] masks = [] # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + '*.zip')) logging.info('Number of images in the dataset: %s' % str(len(filenames))) # ======================== # iterate through the requested indices # ======================== for idx in range(idx_start, idx_end): logging.info( '============================================================') # ======================== # get the file name for this subject # ======================== filename = filenames[idx] # ======================== # define how much of the image can be cropped out as it consists of zeros # ======================== x_start = 18 x_end = -18 y_start = 28 y_end = -27 z_start = 2 z_end = -34 # original images are 260 * 311 * 260 # cropping them down to 224 * 256 * 224 # ======================== # read the contents inside the top-level subject directory # ======================== with zipfile.ZipFile(filename, 'r') as zfile: # ======================== # search for the relevant files # ======================== for name in zfile.namelist(): # ======================== # search for files inside the T1w directory # ======================== if re.search(r'\/T1w/', name) != None: # ======================== # search for .gz files inside the T1w directory # ======================== if re.search(r'\.gz$', name) != None: # ======================== # get the protocol image # ======================== if re.search(protocol + 'acpc_dc_restore_brain', name) != None: logging.info('reading image: %s' % name) _filepath = zfile.extract( name, sys_config.preproc_folder_hcp ) # extract the image filepath _patname = name[:name.find( '/')] # extract the patient name _img_data, _img_affine, _img_header = utils.load_nii( _filepath) # read the 3d image _img_data = _img_data[ x_start:x_end, y_start:y_end, z_start: z_end] # discard some pixels as they are always zero. _img_data = utils.normalise_image( _img_data, norm_type='div_by_max' ) # normalise the image (volume wise) savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_image' + protocol + '.nii' # save the pre-processed image utils.save_nii(savepath, _img_data, _img_affine, _img_header) images.append( _img_data ) # append to the list of all images, affines and patient names affines.append(_img_affine) patnames.append(_patname) # ======================== # get the segmentation mask # ======================== if re.search( 'aparc.aseg', name ) != None: # segmentation mask with ~100 classes if re.search('T1wDividedByT2w_', name) == None: logging.info('reading mask: %s' % name) _segpath = zfile.extract( name, sys_config.preproc_folder_hcp ) # extract the segmentation mask _patname = name[:name.find( '/')] # extract the patient name _seg_data, _seg_affine, _seg_header = utils.load_nii( _segpath) # read the segmentation mask _seg_data = _seg_data[ x_start:x_end, y_start:y_end, z_start: z_end] # discard some pixels as they are always zero. _seg_data = utils.group_segmentation_classes( _seg_data ) # group the segmentation classes as required savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_gt15.nii' # save the pre-processed segmentation ground truth utils.save_nii(savepath, _seg_data, _seg_affine, _seg_header) masks.append( _seg_data ) # append to the list of all masks # ======================== # convert the lists to arrays # ======================== images = np.array(images) affines = np.array(affines) patnames = np.array(patnames) masks = np.array(masks, dtype='uint8') # ======================== # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks # ======================== images = images.swapaxes(1, 2) images = images.reshape(-1, images.shape[2], images.shape[3]) masks = masks.swapaxes(1, 2) masks = masks.reshape(-1, masks.shape[2], masks.shape[3]) # ======================== # save the processed images and masks so that they can be directly read the next time # make appropriate filenames according to the requested indices of training, validation and test images # ======================== logging.info('Saving pre-processed files...') config_details = '%sfrom%dto%d_' % (protocol, idx_start, idx_end) filepath_images = preproc_folder + config_details + 'images_2d.npy' filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy' filepath_affine = preproc_folder + config_details + 'affines.npy' filepath_patnames = preproc_folder + config_details + 'patnames.npy' np.save(filepath_images, images) np.save(filepath_masks, masks) np.save(filepath_affine, affines) np.save(filepath_patnames, patnames) return images, masks, affines, patnames
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size, depth, target_resolution, preprocessing_folder): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + '*.zip')) logging.info('Number of images in the dataset: %s' % str(len(filenames))) # ======================= # create a new hdf5 file # ======================= hdf5_file = h5py.File(output_file, "w") # =============================== # Create datasets for images and labels # =============================== data = {} num_slices = count_slices(filenames, idx_start, idx_end, protocol, preprocessing_folder, depth) # =============================== # the sizes of the image and label arrays are set as: [(number of coronal slices per subject*number of subjects), size of coronal slices] # =============================== data['images'] = hdf5_file.create_dataset("images", [num_slices] + list(size), dtype=np.float32) data['labels'] = hdf5_file.create_dataset("labels", [num_slices] + list(size), dtype=np.uint8) # =============================== # initialize lists # =============================== label_list = [] image_list = [] nx_list = [] ny_list = [] nz_list = [] px_list = [] py_list = [] pz_list = [] pat_names_list = [] # =============================== # initialize counters # =============================== write_buffer = 0 counter_from = 0 # =============================== # iterate through the requested indices # =============================== for idx in range(idx_start, idx_end): # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx], protocol, preprocessing_folder) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets label = utils.group_segmentation_classes(label) # group the segmentation classes as required # ================== # crop volume along z axis (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_z(image, depth) label = utils.crop_or_pad_volume_to_size_along_z(label, depth) # ================== # collect some header info. # ================== px_list.append(float(image_hdr.get_zooms()[0])) py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped pz_list.append(float(image_hdr.get_zooms()[1])) nx_list.append(image.shape[0]) ny_list.append(image.shape[1]) # since axes 1 and 2 have been swapped nz_list.append(image.shape[2]) pat_names_list.append(patient_name) # ================== # normalize the image # ================== image_normalized = utils.normalise_image(image, norm_type='div_by_max') # ====================================================== ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ################### # ====================================================== scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0], image_hdr.get_zooms()[2] / target_resolution[1]] # since axes 1 and 2 have been swapped for zz in range(image.shape[2]): # ============ # rescale the images and labels so that their orientation matches that of the nci dataset # ============ image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]), scale_vector, order=1, preserve_range=True, multichannel=False, mode = 'constant') label2d_rescaled = rescale(np.squeeze(label[:, :, zz]), scale_vector, order=0, preserve_range=True, multichannel=False, mode='constant') # ============ # crop or pad to make of the same size # ============ image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(image2d_rescaled, size[0], size[1]) label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(label2d_rescaled, size[0], size[1]) # ============ # append to list # ============ image_list.append(image2d_rescaled_rotated_cropped) label_list.append(label2d_rescaled_rotated_cropped) # ============ # increment counter # ============ write_buffer += 1 # ============ # Writing needs to happen inside the loop over the slices # ============ if write_buffer >= MAX_WRITE_BUFFER: counter_to = counter_from + write_buffer _write_range_to_hdf5(data, image_list, label_list, counter_from, counter_to) _release_tmp_memory(image_list, label_list) # ============ # update counters # ============ counter_from = counter_to write_buffer = 0 # ============ # write leftover data # ============ logging.info('Writing remaining data') counter_to = counter_from + write_buffer _write_range_to_hdf5(data, image_list, label_list, counter_from, counter_to) _release_tmp_memory(image_list, label_list) # ============ # Write the small datasets - image resolutions, sizes, patient ids # ============ hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16)) hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16)) hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16)) hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32)) hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32)) hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32)) hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10")) # ============ # close the hdf5 file # ============ hdf5_file.close()
def prepare_data(input_folder, output_file, site_name, idx_start, idx_end, protocol, size, depth, target_resolution, preprocessing_folder): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + site_name + '/*/')) logging.info('Number of images in the dataset: %s' % str(len(filenames))) # ======================= # ======================= hdf5_file = h5py.File(output_file, "w") # =============================== # Create datasets for images and labels # =============================== data = {} num_slices = count_slices(filenames, idx_start, idx_end, protocol, preprocessing_folder, depth) data['images'] = hdf5_file.create_dataset("images", [num_slices] + list(size), dtype=np.float32) data['labels'] = hdf5_file.create_dataset("labels", [num_slices] + list(size), dtype=np.uint8) # =============================== # initialize lists # =============================== label_list = [] image_list = [] nx_list = [] ny_list = [] nz_list = [] px_list = [] py_list = [] pz_list = [] pat_names_list = [] # =============================== # =============================== write_buffer = 0 counter_from = 0 # =============================== # iterate through the requested indices # =============================== for idx in range(idx_start, idx_end): # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths( filenames[idx]) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) image = np.swapaxes( image, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = np.swapaxes( label, 1, 2 ) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets label = utils.group_segmentation_classes( label) # group the segmentation classes as required # ============ # create a segmentation mask and use it to get rid of the skull in the image # ============ label_mask = np.copy(label) label_mask[label > 0] = 1 image = image * label_mask # ================== # crop out some portion of the image, which are all zeros (rough registration via visual inspection) # ================== if site_name is 'CALTECH': image = image[:, 80:, :] label = label[:, 80:, :] elif site_name is 'STANFORD': image, label = center_image_and_label(image, label) # plt.figure(); plt.imshow(image[:,:,50], cmap='gray'); plt.title(patient_name); plt.show(); plt.close() # ================== # crop volume along z axis (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_z(image, depth) label = utils.crop_or_pad_volume_to_size_along_z(label, depth) # ================== # collect some header info. # ================== px_list.append(float(image_hdr.get_zooms()[0])) py_list.append( float(image_hdr.get_zooms()[2]) ) # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions pz_list.append(float(image_hdr.get_zooms()[1])) nx_list.append(image.shape[0]) ny_list.append( image.shape[1] ) # since axes 1 and 2 have been swapped. however, only the final axis locations are relevant when dealing with shapes nz_list.append(image.shape[2]) pat_names_list.append(patient_name) # ================== # normalize the image # ================== image_normalized = utils.normalise_image(image, norm_type='div_by_max') # ====================================================== ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ################### # ====================================================== scale_vector = [ image_hdr.get_zooms()[0] / target_resolution[0], image_hdr.get_zooms()[2] / target_resolution[1] ] # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions for zz in range(image.shape[2]): # ============ # rescale the images and labels so that their orientation matches that of the nci dataset # ============ image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]), scale_vector, order=1, preserve_range=True, multichannel=False, mode='constant') label2d_rescaled = rescale(np.squeeze(label[:, :, zz]), scale_vector, order=0, preserve_range=True, multichannel=False, mode='constant') # ============ # crop or pad to make of the same size # ============ image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size( image2d_rescaled, size[0], size[1]) label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size( label2d_rescaled, size[0], size[1]) # ============ # append to list # ============ image_list.append(image2d_rescaled_rotated_cropped) label_list.append(label2d_rescaled_rotated_cropped) write_buffer += 1 # Writing needs to happen inside the loop over the slices if write_buffer >= MAX_WRITE_BUFFER: counter_to = counter_from + write_buffer _write_range_to_hdf5(data, image_list, label_list, counter_from, counter_to) _release_tmp_memory(image_list, label_list) # update counters counter_from = counter_to write_buffer = 0 logging.info('Writing remaining data') counter_to = counter_from + write_buffer _write_range_to_hdf5(data, image_list, label_list, counter_from, counter_to) _release_tmp_memory(image_list, label_list) # Write the small datasets hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16)) hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16)) hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16)) hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32)) hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32)) hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32)) hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10")) # After test train loop: hdf5_file.close()
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size, depth, target_resolution, preprocessing_folder): # ======================== # read the filenames # ======================== filenames = sorted(glob.glob(input_folder + '*.zip')) logging.info('Number of images in the dataset: %s' % str(len(filenames))) # ======================= # ======================= hdf5_file = h5py.File(output_file, "w") # =============================== # Create datasets for images and labels # =============================== data = {} num_subjects = idx_end - idx_start data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32) data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8) # =============================== # initialize lists # =============================== label_list = [] image_list = [] nx_list = [] ny_list = [] nz_list = [] px_list = [] py_list = [] pz_list = [] pat_names_list = [] # =============================== # initiate counter # =============================== patient_counter = 0 # =============================== # iterate through the requested indices # =============================== for idx in range(idx_start, idx_end): # ================== # get file paths # ================== patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx], protocol, preprocessing_folder) # ============ # read the image and normalize it to be between 0 and 1 # ============ image, _, image_hdr = utils.load_nii(image_path) image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets # ================== # read the label file # ================== label, _, _ = utils.load_nii(label_path) label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets label = utils.group_segmentation_classes(label) # group the segmentation classes as required # ================== # collect some header info. # ================== px_list.append(float(image_hdr.get_zooms()[0])) py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped pz_list.append(float(image_hdr.get_zooms()[1])) nx_list.append(image.shape[0]) ny_list.append(image.shape[2]) # since axes 1 and 2 have been swapped nz_list.append(image.shape[1]) pat_names_list.append(patient_name) # ================== # crop volume along z axis (as there are several zeros towards the ends) # ================== image = utils.crop_or_pad_volume_to_size_along_z(image, depth) label = utils.crop_or_pad_volume_to_size_along_z(label, depth) # ================== # normalize the image # ================== image_normalized = utils.normalise_image(image, norm_type='div_by_max') # ====================================================== # rescale, crop / pad to make all images of the required size and resolution # ====================================================== scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0], image_hdr.get_zooms()[2] / target_resolution[1], image_hdr.get_zooms()[1] / target_resolution[2]] # since axes 1 and 2 have been swapped image_rescaled = transform.rescale(image_normalized, scale_vector, order=1, preserve_range=True, multichannel=False, mode = 'constant') label_onehot = utils.make_onehot_(label, nlabels=15) label_onehot_rescaled = transform.rescale(label_onehot, scale_vector, order=1, preserve_range=True, multichannel=True, mode='constant') label_rescaled = np.argmax(label_onehot_rescaled, axis=-1) # ================================== # go through each z slice, crop or pad to a constant size and then append the resized # this will ensure that the axes get arranged in the same orientation as they were during the 2d preprocessing # ================================== image_rescaled_cropped = [] label_rescaled_cropped = [] for zz in range(image_rescaled.shape[2]): image_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(image_rescaled[:,:,zz], size[1], size[2])) label_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(label_rescaled[:,:,zz], size[1], size[2])) image_rescaled_cropped = np.array(image_rescaled_cropped) label_rescaled_cropped = np.array(label_rescaled_cropped) # ============ # append to list # ============ image_list.append(image_rescaled_cropped) label_list.append(label_rescaled_cropped) # ============ # write to file # ============ _write_range_to_hdf5(data, image_list, label_list, patient_counter, patient_counter+1) _release_tmp_memory(image_list, label_list) # update counter patient_counter += 1 # Write the small datasets hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16)) hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16)) hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16)) hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32)) hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32)) hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32)) hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10")) # After test train loop: hdf5_file.close()
def prepare_data(input_folder, preproc_folder, idx_start, idx_end, bias_correction): images = [] affines = [] patnames = [] masks = [] # read the foldernames foldernames = sorted(glob.glob(input_folder + '*/')) logging.info('Number of images in the dataset: %s' % str(len(foldernames))) # iterate through all indices for idx in range(len(foldernames)): # only consider images within the indices requested if (idx < idx_start) or (idx >= idx_end): logging.info('skipping subject: %d' % idx) continue # get the file name for this subject foldername = foldernames[idx] # extract the patient name _patname = foldername[foldername[:-1].rfind('/') + 1:-1] if _patname == 'A00033264': # this subject has images of a different size continue # ==================================================== # search for the segmentation file # ==================================================== name = foldername + 'orig_labels_aligned_with_true_image.nii.gz' # segmentation mask with ~100 classes logging.info('==============================================') logging.info('reading segmentation mask: %s' % name) # read the segmentation mask _seg_data, _seg_affine, _seg_header = utils.load_nii(name) # group the segmentation classes as required _seg_data = utils.group_segmentation_classes(_seg_data) # ==================================================== # read the image file # ==================================================== if bias_correction is True: name = foldername + 'MPRAGE_n4.nii' # read the original image else: name = foldername + 'MPRAGE.nii' # read the original image # ==================================================== # bias correction before reading the image file (optional) # ==================================================== # read the image logging.info('reading image: %s' % name) _img_data, _img_affine, _img_header = utils.load_nii(name) # _img_header.get_zooms() = (1.0, 1.0, 1.0) # ============ # create a segmentation mask and use it to get rid of the skull in the image # ============ seg_mask = np.copy(_seg_data) seg_mask[_seg_data > 0] = 1 img_masked = _img_data * seg_mask # normalise the image _img_data = utils.normalise_image(img_masked, norm_type='div_by_max') # ============ # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images # ============ img_rescaled = rescale(image=_img_data, scale=10 / 7, order=1, preserve_range=True, multichannel=False) seg_rescaled = rescale(image=_seg_data, scale=10 / 7, order=0, preserve_range=True, multichannel=False) # ============ # A lot of the periphery is just zeros, so get rid of some of it # ============ # define how much of the image can be cropped out as it consists of zeros x_start = 13 x_end = -14 y_start = 55 y_end = -55 z_start = 55 + 16 + 50 z_end = -55 - 16 + 50 # original images are 176 * 256 * 256 # rescaling them makes them 251 * 366 * 366 # cropping them down to 224 * 256 * 224 img_rescaled = img_rescaled[x_start:x_end, y_start:y_end, z_start:z_end] seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end, z_start:z_end] # save the pre-processed segmentation ground truth utils.makefolder(preproc_folder + _patname) utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii', seg_rescaled, _seg_affine) if bias_correction is True: utils.save_nii( preproc_folder + _patname + '/preprocessed_image_n4.nii', img_rescaled, _img_affine) else: utils.save_nii( preproc_folder + _patname + '/preprocessed_image.nii', img_rescaled, _img_affine) # append to lists images.append(img_rescaled) affines.append(_img_affine) patnames.append(_patname) masks.append(seg_rescaled) # convert the lists to arrays images = np.array(images) affines = np.array(affines) patnames = np.array(patnames) masks = np.array(masks, dtype='uint8') # ======================== # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks # ======================== images = images.swapaxes(1, 2) images = images.reshape(-1, images.shape[2], images.shape[3]) masks = masks.swapaxes(1, 2) masks = masks.reshape(-1, masks.shape[2], masks.shape[3]) # save the processed images and masks so that they can be directly read the next time # make appropriate filenames according to the requested indices of training, validation and test images logging.info('Saving pre-processed files...') config_details = 'from%dto%d_' % (idx_start, idx_end) if bias_correction is True: filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy' else: filepath_images = preproc_folder + config_details + 'images_2d.npy' filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy' filepath_affine = preproc_folder + config_details + 'affines.npy' filepath_patnames = preproc_folder + config_details + 'patnames.npy' np.save(filepath_images, images) np.save(filepath_masks, masks) np.save(filepath_affine, affines) np.save(filepath_patnames, patnames) return images, masks, affines, patnames