Esempio n. 1
0
def load_without_size_preprocessing(input_folder, site_name, idx, depth):

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + site_name + '/*/'))

    # ==================
    # get file paths
    # ==================
    patient_name, image_path, label_path = get_image_and_label_paths(
        filenames[idx])

    # ============
    # read the image and normalize it to be between 0 and 1
    # ============
    image, _, image_hdr = utils.load_nii(image_path)
    image = np.swapaxes(
        image, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets

    # ==================
    # read the label file
    # ==================
    label, _, _ = utils.load_nii(label_path)
    label = np.swapaxes(
        label, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    label = utils.group_segmentation_classes(
        label)  # group the segmentation classes as required

    # ============
    # create a segmentation mask and use it to get rid of the skull in the image
    # ============
    label_mask = np.copy(label)
    label_mask[label > 0] = 1
    image = image * label_mask

    # ==================
    # crop out some portion of the image, which are all zeros (rough registration via visual inspection)
    # ==================
    if site_name is 'CALTECH':
        image = image[:, 80:, :]
        label = label[:, 80:, :]
    elif site_name is 'STANFORD':
        image, label = center_image_and_label(image, label)

    # ==================
    # crop volume along z axis (as there are several zeros towards the ends)
    # ==================
    image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
    label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

    # ==================
    # normalize the image
    # ==================
    image = utils.normalise_image(image, norm_type='div_by_max')

    return image, label
Esempio n. 2
0
def load_without_size_preprocessing(input_folder, idx, protocol,
                                    preprocessing_folder, depth):

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))

    # ==================
    # get file paths
    # ==================
    patient_name, image_path, label_path = get_image_and_label_paths(
        filenames[idx], protocol, preprocessing_folder)

    # ============
    # read the image and normalize it to be between 0 and 1
    # ============
    image, _, image_hdr = utils.load_nii(image_path)
    image = np.swapaxes(
        image, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    image = utils.normalise_image(image, norm_type='div_by_max')

    # ==================
    # read the label file
    # ==================
    label, _, _ = utils.load_nii(label_path)
    label = np.swapaxes(
        label, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    label = utils.group_segmentation_classes(
        label)  # group the segmentation classes as required

    # ==================
    # crop volume along z axis (as there are several zeros towards the ends)
    # ==================
    image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
    label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

    return image, label
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size,
                 target_resolution, preprocessing_folder):
    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # create a hdf5 file
    # =======================
    # hdf5_file = h5py.File(output_file, "w")
    #
    # # ===============================
    # # Create datasets for images and labels
    # # ===============================
    # data = {}
    # num_subjects = idx_end - idx_start
    #
    # data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32)
    # data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8)
    #
    # # ===============================
    # initialize lists
    # ===============================
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []

    # ===============================
    # initiate counter
    # ===============================
    patient_counter = 0

    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        logging.info('Volume {} of {}...'.format(idx, idx_end))

        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(
            filenames[idx], protocol, preprocessing_folder)

        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)

        # ==================
        # read the label file
        # ==================
        label, _, _ = utils.load_nii(label_path)
        label = utils.group_segmentation_classes(
            label)  # group the segmentation classes as required

        # # ==================
        # # collect some header info.
        # # ==================
        # px_list.append(float(image_hdr.get_zooms()[0]))
        # py_list.append(float(image_hdr.get_zooms()[1]))
        # pz_list.append(float(image_hdr.get_zooms()[2]))
        # nx_list.append(image.shape[0])
        # ny_list.append(image.shape[1])
        # nz_list.append(image.shape[2])
        # pat_names_list.append(patient_name)

        # ==================
        # crop volume along all axes from the ends (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_x(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_x(label, 256)
        image = utils.crop_or_pad_volume_to_size_along_y(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_y(label, 256)
        image = utils.crop_or_pad_volume_to_size_along_z(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_z(label, 256)

        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')

        # ======================================================
        # rescale, crop / pad to make all images of the required size and resolution
        # ======================================================
        scale_vector = [
            image_hdr.get_zooms()[0] / target_resolution[0],
            image_hdr.get_zooms()[1] / target_resolution[1],
            image_hdr.get_zooms()[2] / target_resolution[2]
        ]

        image_rescaled = transform.rescale(image_normalized,
                                           scale_vector,
                                           order=1,
                                           preserve_range=True,
                                           multichannel=False,
                                           mode='constant')

        # label_onehot = utils.make_onehot(label, nlabels=15)
        #
        # label_onehot_rescaled = transform.rescale(label_onehot,
        #                                           scale_vector,
        #                                           order=1,
        #                                           preserve_range=True,
        #                                           multichannel=True,
        #                                           mode='constant')
        #
        # label_rescaled = np.argmax(label_onehot_rescaled, axis=-1)
        #
        # # ============
        # # the images and labels have been rescaled to the desired resolution.
        # # write them to the hdf5 file now.
        # # ============
        # image_list.append(image_rescaled)
        # label_list.append(label_rescaled)

        # ============
        # write to file
        # ============
        # image_rescaled
        volume_dir = os.path.join(preprocessing_folder,
                                  'volume_{:06d}'.format(idx))
        os.makedirs(volume_dir, exist_ok=True)
        for i in range(size[1]):
            slice_path = os.path.join(volume_dir,
                                      'slice_{:06d}.jpeg'.format(i))
            slice = image_rescaled[:, i, :] * 255
            image = Image.fromarray(slice.astype(np.uint8))
            image.save(slice_path)
def prepare_data(input_folder, preproc_folder, protocol, idx_start, idx_end):

    images = []
    affines = []
    patnames = []
    masks = []

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # ========================
    # iterate through the requested indices
    # ========================
    for idx in range(idx_start, idx_end):

        logging.info(
            '============================================================')

        # ========================
        # get the file name for this subject
        # ========================
        filename = filenames[idx]

        # ========================
        # define how much of the image can be cropped out as it consists of zeros
        # ========================
        x_start = 18
        x_end = -18
        y_start = 28
        y_end = -27
        z_start = 2
        z_end = -34
        # original images are 260 * 311 * 260
        # cropping them down to 224 * 256 * 224

        # ========================
        # read the contents inside the top-level subject directory
        # ========================
        with zipfile.ZipFile(filename, 'r') as zfile:

            # ========================
            # search for the relevant files
            # ========================
            for name in zfile.namelist():

                # ========================
                # search for files inside the T1w directory
                # ========================
                if re.search(r'\/T1w/', name) != None:

                    # ========================
                    # search for .gz files inside the T1w directory
                    # ========================
                    if re.search(r'\.gz$', name) != None:

                        # ========================
                        # get the protocol image
                        # ========================
                        if re.search(protocol + 'acpc_dc_restore_brain',
                                     name) != None:

                            logging.info('reading image: %s' % name)

                            _filepath = zfile.extract(
                                name, sys_config.preproc_folder_hcp
                            )  # extract the image filepath

                            _patname = name[:name.find(
                                '/')]  # extract the patient name

                            _img_data, _img_affine, _img_header = utils.load_nii(
                                _filepath)  # read the 3d image

                            _img_data = _img_data[
                                x_start:x_end, y_start:y_end, z_start:
                                z_end]  # discard some pixels as they are always zero.

                            _img_data = utils.normalise_image(
                                _img_data, norm_type='div_by_max'
                            )  # normalise the image (volume wise)

                            savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_image' + protocol + '.nii'  # save the pre-processed image
                            utils.save_nii(savepath, _img_data, _img_affine,
                                           _img_header)

                            images.append(
                                _img_data
                            )  # append to the list of all images, affines and patient names
                            affines.append(_img_affine)
                            patnames.append(_patname)

                        # ========================
                        # get the segmentation mask
                        # ========================
                        if re.search(
                                'aparc.aseg', name
                        ) != None:  # segmentation mask with ~100 classes

                            if re.search('T1wDividedByT2w_', name) == None:

                                logging.info('reading mask: %s' % name)

                                _segpath = zfile.extract(
                                    name, sys_config.preproc_folder_hcp
                                )  # extract the segmentation mask

                                _patname = name[:name.find(
                                    '/')]  # extract the patient name

                                _seg_data, _seg_affine, _seg_header = utils.load_nii(
                                    _segpath)  # read the segmentation mask

                                _seg_data = _seg_data[
                                    x_start:x_end, y_start:y_end, z_start:
                                    z_end]  # discard some pixels as they are always zero.

                                _seg_data = utils.group_segmentation_classes(
                                    _seg_data
                                )  # group the segmentation classes as required

                                savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_gt15.nii'  # save the pre-processed segmentation ground truth
                                utils.save_nii(savepath, _seg_data,
                                               _seg_affine, _seg_header)

                                masks.append(
                                    _seg_data
                                )  # append to the list of all masks

    # ========================
    # convert the lists to arrays
    # ========================
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # ========================
    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    # ========================
    logging.info('Saving pre-processed files...')
    config_details = '%sfrom%dto%d_' % (protocol, idx_start, idx_end)
    filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'
    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
Esempio n. 5
0
def prepare_data(input_folder,
                 output_file,
                 idx_start,
                 idx_end,
                 protocol,
                 size,
                 depth,
                 target_resolution,
                 preprocessing_folder):

    # ========================    
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # create a new hdf5 file
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_slices = count_slices(filenames,
                              idx_start,
                              idx_end,
                              protocol,
                              preprocessing_folder,
                              depth)
    
    # ===============================
    # the sizes of the image and label arrays are set as: [(number of coronal slices per subject*number of subjects), size of coronal slices]
    # ===============================
    data['images'] = hdf5_file.create_dataset("images", [num_slices] + list(size), dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels", [num_slices] + list(size), dtype=np.uint8)
    
    # ===============================
    # initialize lists
    # ===============================        
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []
    
    # ===============================      
    # initialize counters
    # ===============================        
    write_buffer = 0
    counter_from = 0
    
    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        
        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx],
                                                                         protocol,
                                                                         preprocessing_folder)
        
        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        
        # ==================
        # read the label file
        # ==================        
        label, _, _ = utils.load_nii(label_path)        
        label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        label = utils.group_segmentation_classes(label) # group the segmentation classes as required
                
        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)     

        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0]) 
        ny_list.append(image.shape[1]) # since axes 1 and 2 have been swapped
        nz_list.append(image.shape[2])
        pat_names_list.append(patient_name)
        
        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')
                        
        # ======================================================  
        ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
        # ======================================================
        scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0],
                        image_hdr.get_zooms()[2] / target_resolution[1]] # since axes 1 and 2 have been swapped

        for zz in range(image.shape[2]):

            # ============
            # rescale the images and labels so that their orientation matches that of the nci dataset
            # ============            
            image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]),
                                                  scale_vector,
                                                  order=1,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode = 'constant')
 
            label2d_rescaled = rescale(np.squeeze(label[:, :, zz]),
                                                  scale_vector,
                                                  order=0,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode='constant')
            
            # ============            
            # crop or pad to make of the same size
            # ============            
            image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(image2d_rescaled, size[0], size[1])
            label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(label2d_rescaled, size[0], size[1])

            # ============   
            # append to list
            # ============   
            image_list.append(image2d_rescaled_rotated_cropped)
            label_list.append(label2d_rescaled_rotated_cropped)

            # ============   
            # increment counter
            # ============   
            write_buffer += 1

            # ============   
            # Writing needs to happen inside the loop over the slices
            # ============   
            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer

                _write_range_to_hdf5(data,
                                     image_list,
                                     label_list,
                                     counter_from,
                                     counter_to)
                
                _release_tmp_memory(image_list,
                                    label_list)

                # ============   
                # update counters 
                # ============   
                counter_from = counter_to
                write_buffer = 0
        
    # ============   
    # write leftover data
    # ============   
    logging.info('Writing remaining data')
    counter_to = counter_from + write_buffer
    _write_range_to_hdf5(data,
                         image_list,
                         label_list,
                         counter_from,
                         counter_to)
    _release_tmp_memory(image_list,
                        label_list)

    # ============   
    # Write the small datasets - image resolutions, sizes, patient ids
    # ============   
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10"))
    
    # ============   
    # close the hdf5 file
    # ============   
    hdf5_file.close()
def prepare_data(input_folder, output_file, site_name, idx_start, idx_end,
                 protocol, size, depth, target_resolution,
                 preprocessing_folder):

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + site_name + '/*/'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_slices = count_slices(filenames, idx_start, idx_end, protocol,
                              preprocessing_folder, depth)

    data['images'] = hdf5_file.create_dataset("images",
                                              [num_slices] + list(size),
                                              dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels",
                                              [num_slices] + list(size),
                                              dtype=np.uint8)

    # ===============================
    # initialize lists
    # ===============================
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []

    # ===============================
    # ===============================
    write_buffer = 0
    counter_from = 0

    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):

        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(
            filenames[idx])

        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(
            image, 1, 2
        )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets

        # ==================
        # read the label file
        # ==================
        label, _, _ = utils.load_nii(label_path)
        label = np.swapaxes(
            label, 1, 2
        )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        label = utils.group_segmentation_classes(
            label)  # group the segmentation classes as required

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        label_mask = np.copy(label)
        label_mask[label > 0] = 1
        image = image * label_mask

        # ==================
        # crop out some portion of the image, which are all zeros (rough registration via visual inspection)
        # ==================
        if site_name is 'CALTECH':
            image = image[:, 80:, :]
            label = label[:, 80:, :]
        elif site_name is 'STANFORD':
            image, label = center_image_and_label(image, label)

        # plt.figure(); plt.imshow(image[:,:,50], cmap='gray'); plt.title(patient_name); plt.show(); plt.close()

        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(
            float(image_hdr.get_zooms()[2])
        )  # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0])
        ny_list.append(
            image.shape[1]
        )  # since axes 1 and 2 have been swapped. however, only the final axis locations are relevant when dealing with shapes
        nz_list.append(image.shape[2])
        pat_names_list.append(patient_name)

        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')

        # ======================================================
        ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
        # ======================================================
        scale_vector = [
            image_hdr.get_zooms()[0] / target_resolution[0],
            image_hdr.get_zooms()[2] / target_resolution[1]
        ]  # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions

        for zz in range(image.shape[2]):

            # ============
            # rescale the images and labels so that their orientation matches that of the nci dataset
            # ============
            image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]),
                                       scale_vector,
                                       order=1,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            label2d_rescaled = rescale(np.squeeze(label[:, :, zz]),
                                       scale_vector,
                                       order=0,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            # ============
            # crop or pad to make of the same size
            # ============
            image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(
                image2d_rescaled, size[0], size[1])
            label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(
                label2d_rescaled, size[0], size[1])

            # ============
            # append to list
            # ============
            image_list.append(image2d_rescaled_rotated_cropped)
            label_list.append(label2d_rescaled_rotated_cropped)

            write_buffer += 1

            # Writing needs to happen inside the loop over the slices
            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer

                _write_range_to_hdf5(data, image_list, label_list,
                                     counter_from, counter_to)

                _release_tmp_memory(image_list, label_list)

                # update counters
                counter_from = counter_to
                write_buffer = 0

    logging.info('Writing remaining data')
    counter_to = counter_from + write_buffer
    _write_range_to_hdf5(data, image_list, label_list, counter_from,
                         counter_to)
    _release_tmp_memory(image_list, label_list)

    # Write the small datasets
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames',
                             data=np.asarray(pat_names_list, dtype="S10"))

    # After test train loop:
    hdf5_file.close()
Esempio n. 7
0
def prepare_data(input_folder,
                 output_file,
                 idx_start,
                 idx_end,
                 protocol,
                 size,
                 depth,
                 target_resolution,
                 preprocessing_folder):

    # ========================    
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_subjects = idx_end - idx_start
    
    data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8)
    
    # ===============================
    # initialize lists
    # ===============================        
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []
    
    # ===============================        
    # initiate counter
    # ===============================        
    patient_counter = 0
    
    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        
        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx],
                                                                         protocol,
                                                                         preprocessing_folder)
        
        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        
        # ==================
        # read the label file
        # ==================        
        label, _, _ = utils.load_nii(label_path)        
        label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        label = utils.group_segmentation_classes(label) # group the segmentation classes as required
        
        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0]) 
        ny_list.append(image.shape[2]) # since axes 1 and 2 have been swapped
        nz_list.append(image.shape[1])
        pat_names_list.append(patient_name)
        
        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)     
        
        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')
                        
        # ======================================================  
        # rescale, crop / pad to make all images of the required size and resolution
        # ======================================================
        scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0],
                        image_hdr.get_zooms()[2] / target_resolution[1],
                        image_hdr.get_zooms()[1] / target_resolution[2]] # since axes 1 and 2 have been swapped
        
        image_rescaled = transform.rescale(image_normalized,
                                           scale_vector,
                                           order=1,
                                           preserve_range=True,
                                           multichannel=False,
                                           mode = 'constant')

        label_onehot = utils.make_onehot_(label, nlabels=15)

        label_onehot_rescaled = transform.rescale(label_onehot,
                                                  scale_vector,
                                                  order=1,
                                                  preserve_range=True,
                                                  multichannel=True,
                                                  mode='constant')
        
        label_rescaled = np.argmax(label_onehot_rescaled, axis=-1)
        
        # ==================================
        # go through each z slice, crop or pad to a constant size and then append the resized 
        # this will ensure that the axes get arranged in the same orientation as they were during the 2d preprocessing
        # ==================================
        image_rescaled_cropped = []
        label_rescaled_cropped = []
        for zz in range(image_rescaled.shape[2]):
            image_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(image_rescaled[:,:,zz], size[1], size[2]))
            label_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(label_rescaled[:,:,zz], size[1], size[2]))
        image_rescaled_cropped = np.array(image_rescaled_cropped)
        label_rescaled_cropped = np.array(label_rescaled_cropped)

        # ============   
        # append to list
        # ============   
        image_list.append(image_rescaled_cropped)
        label_list.append(label_rescaled_cropped)

        # ============   
        # write to file
        # ============   
        _write_range_to_hdf5(data,
                             image_list,
                             label_list,
                             patient_counter,
                             patient_counter+1)
        
        _release_tmp_memory(image_list,
                            label_list)
        
        # update counter
        patient_counter += 1

    # Write the small datasets
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10"))
    
    # After test train loop:
    hdf5_file.close()
def prepare_data(input_folder, preproc_folder, idx_start, idx_end,
                 bias_correction):

    images = []
    affines = []
    patnames = []
    masks = []

    # read the foldernames
    foldernames = sorted(glob.glob(input_folder + '*/'))
    logging.info('Number of images in the dataset: %s' % str(len(foldernames)))

    # iterate through all indices
    for idx in range(len(foldernames)):

        # only consider images within the indices requested
        if (idx < idx_start) or (idx >= idx_end):
            logging.info('skipping subject: %d' % idx)
            continue

        # get the file name for this subject
        foldername = foldernames[idx]

        # extract the patient name
        _patname = foldername[foldername[:-1].rfind('/') + 1:-1]
        if _patname == 'A00033264':  # this subject has images of a different size
            continue

        # ====================================================
        # search for the segmentation file
        # ====================================================
        name = foldername + 'orig_labels_aligned_with_true_image.nii.gz'  # segmentation mask with ~100 classes
        logging.info('==============================================')
        logging.info('reading segmentation mask: %s' % name)

        # read the segmentation mask
        _seg_data, _seg_affine, _seg_header = utils.load_nii(name)

        # group the segmentation classes as required
        _seg_data = utils.group_segmentation_classes(_seg_data)

        # ====================================================
        # read the image file
        # ====================================================
        if bias_correction is True:
            name = foldername + 'MPRAGE_n4.nii'  # read the original image
        else:
            name = foldername + 'MPRAGE.nii'  # read the original image

        # ====================================================
        # bias correction  before reading the image file (optional)
        # ====================================================

        # read the image
        logging.info('reading image: %s' % name)
        _img_data, _img_affine, _img_header = utils.load_nii(name)
        # _img_header.get_zooms() = (1.0, 1.0, 1.0)

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        seg_mask = np.copy(_seg_data)
        seg_mask[_seg_data > 0] = 1
        img_masked = _img_data * seg_mask

        # normalise the image
        _img_data = utils.normalise_image(img_masked, norm_type='div_by_max')

        # ============
        # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images
        # ============
        img_rescaled = rescale(image=_img_data,
                               scale=10 / 7,
                               order=1,
                               preserve_range=True,
                               multichannel=False)
        seg_rescaled = rescale(image=_seg_data,
                               scale=10 / 7,
                               order=0,
                               preserve_range=True,
                               multichannel=False)

        # ============
        # A lot of the periphery is just zeros, so get rid of some of it
        # ============
        # define how much of the image can be cropped out as it consists of zeros
        x_start = 13
        x_end = -14
        y_start = 55
        y_end = -55
        z_start = 55 + 16 + 50
        z_end = -55 - 16 + 50
        # original images are 176 * 256 * 256
        # rescaling them makes them 251 * 366 * 366
        # cropping them down to 224 * 256 * 224
        img_rescaled = img_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]
        seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]

        # save the pre-processed segmentation ground truth
        utils.makefolder(preproc_folder + _patname)
        utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii',
                       seg_rescaled, _seg_affine)
        if bias_correction is True:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image_n4.nii',
                img_rescaled, _img_affine)
        else:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image.nii',
                img_rescaled, _img_affine)

        # append to lists
        images.append(img_rescaled)
        affines.append(_img_affine)
        patnames.append(_patname)
        masks.append(seg_rescaled)

    # convert the lists to arrays
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    logging.info('Saving pre-processed files...')
    config_details = 'from%dto%d_' % (idx_start, idx_end)

    if bias_correction is True:
        filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy'
    else:
        filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames