def data_gen(self, list_IDs_temp, pos_sample):
        """
        Generates a batch of data.
        Args:
            list_IDs_temp: batched list IDs; usually done by __getitem__
            pos_sample: boolean on if you want to sample a positive image or not
        Returns:
            tuple of two numpy arrays: x, y
        """
        images_x = []
        images_y = []
        for id in list_IDs_temp:
            # loads data as a numpy arr and then changes the type to float32
            x_train = np.expand_dims(np.load(os.path.join(self.data_dirs[0], id)), -1)
            y_train = np.expand_dims(np.load(os.path.join(self.data_dirs[1], id)), -1)
            # Padding to the max patient shape (so the arrays can be stacked)
            if self.dynamic_padding_z: # for when you don't want to pad the slice dimension (bc that usually changes in images)
                pad_shape = (x_train.shape[0], ) + self.max_patient_shape
            elif not self.dynamic_padding_z:
                pad_shape = self.max_patient_shape
            x_train = reshape(x_train, x_train.min(), pad_shape + (self.n_channels, ))
            y_train = reshape(y_train, 0, pad_shape + (self.n_classes, ))
            # extracting slice:
            if pos_sample:
                slice_idx = get_positive_idx(y_train)[0]
            elif not pos_sample:
                slice_idx = get_random_slice_idx(x_train)

            images_x.append(x_train[slice_idx]), images_y.append(y_train[slice_idx])

        input_data, seg_masks = np.stack(images_x), np.stack(images_y)
        return (input_data, seg_masks)
Esempio n. 2
0
    def data_gen(self, list_IDs_temp, pos_sample):
        """
        Generates a batch of data.
        Args:
            list_IDs_temp: batched list IDs; usually done by __getitem__
            pos_sample: boolean on if you want to sample a positive image or not
        Returns:
            tuple of two numpy arrays: x, y
        """
        images_x = []
        images_y = []
        for id in list_IDs_temp:
            # loads data as a numpy arr and then changes the type to float32
            x_train = load_data(os.path.join(self.data_dirs[0], id))
            y_train = load_data(os.path.join(self.data_dirs[1], id))
            if not x_train.shape[-1] == self.n_channels:
                # Adds channel in case there is no channel dimension
                x_train = add_channel(x_train)
            if not y_train.shape[-1] == self.n_channels:
                # Adds channel in case there is no channel dimension
                y_train = add_channel(y_train)

            if self.n_classes > 1:  # no point to run this when binary (foreground/background)
                y_train = get_multi_class_labels(y_train,
                                                 n_labels=self.n_classes,
                                                 remove_background=True)

            # Padding to the max patient shape (so the arrays can be stacked)
            if self.dynamic_padding_z:  # for when you don't want to pad the slice dimension (bc that usually changes in images)
                pad_shape = (x_train.shape[0], ) + self.max_patient_shape
            elif not self.dynamic_padding_z:
                pad_shape = self.max_patient_shape
            x_train = reshape(x_train, x_train.min(),
                              pad_shape + (self.n_channels, ))
            y_train = reshape(y_train, 0, pad_shape + (self.n_classes, ))
            assert sanity_checks(x_train, y_train)
            # extracting slice:
            if pos_sample:
                slice_idx = get_positive_idx(y_train)[0]
            elif not pos_sample:
                slice_idx = get_random_slice_idx(x_train)

            images_x.append(x_train[slice_idx]), images_y.append(
                y_train[slice_idx])

        input_data, seg_masks = np.stack(images_x), np.stack(images_y)
        return (input_data, seg_masks)
Esempio n. 3
0
    def data_gen(self, list_IDs_temp):
        """
        Generates a batch of data.
        Args:
            list_IDs_temp: batched list IDs; usually done by __getitem__
            pos_sample: boolean on if you want to sample a positive image or not
        Returns:
            tuple of two numpy arrays: x, y
        """
        images_x = []
        images_y = []
        for id in list_IDs_temp:
            # loads data as a numpy arr and then changes the type to float32
            x_train = load_data(os.path.join(self.data_dirs[0], id))
            y_train = load_data(os.path.join(self.data_dirs[1], id))
            if not x_train.shape[-1] == self.n_channels:
                # Adds channel in case there is no channel dimension
                x_train = add_channel(x_train)
                assert len(
                    x_train.shape) == self.ndim + 1, "Input shape must be the \
                                                            shape (x,y, n_channels) or (x, y, z, n_channels)"

            if not y_train.shape[-1] == self.n_channels:
                # Adds channel in case there is no channel dimension
                y_train = add_channel(y_train)
                assert len(y_train.shape
                           ) == self.ndim + 1, "Input labels must be the \
                                                            shape (x,y, n_channels) or (x, y, z, n_channels)"

            if self.n_classes > 1:  # no point to run this when binary (foreground/background)
                y_train = get_multi_class_labels(y_train,
                                                 n_labels=self.n_classes,
                                                 remove_background=True)
            # Padding to the max patient shape (so the arrays can be stacked)
            x_train = reshape(x_train, x_train.min(),
                              self.max_patient_shape + (self.n_channels, ))
            y_train = reshape(y_train, 0,
                              self.max_patient_shape + (self.n_classes, ))

            # x_train.resize(max_patient_shape + (self.n_channels, )), y_train.resize(max_patient_shape + (self.n_classes, ))
            assert sanity_checks(x_train, y_train)
            images_x.append(x_train), images_y.append(y_train)

        input_data, seg_masks = np.stack(images_x), np.stack(images_y)
        return (input_data, seg_masks)
Esempio n. 4
0
 def test_undo_reshape_padding(self):
     """
     Tests that `undo_reshape_padding` will produce the original image from the padded image
     """
     # reshape(orig_img, append_value=-1024, new_shape=(512, 512, 512)
     # setting up reshaped image
     orig_shape = self.label_image_3D.shape
     new_shape = (512, 512, 512, self.n_channels)
     reshaped_img = reshape(self.label_image_3D, append_value = 0, new_shape = new_shape)
     # undo padding
     undone_img = undo_reshape_padding(reshaped_img, orig_shape)
     # checking that the original and undo-padded array are the same
     self.assertTrue(np.array_equal(undone_img, self.label_image_3D))
Esempio n. 5
0
def pred_data_2D_per_sample(model, x_dir, y_dir, fnames, pad_shape = (256, 320), batch_size = 2, \
                            mean_patient_shape = (115, 320, 232), ct = False):
    """
    Loads raw data, preprocesses it, predicts 3D volumes slicewise (2D) one sample at a time, and pads to the original shape.
    Assumptions:
        The segmentation task you're working with is binary.
        The multi-output models output only have two outputs: (prediction mask, reconstruction mask)
        The .nii.gz files have the shape: (x, y, z) where z is the number of slices.
    Args:
        model: keras.models.Model instance
        x_dir: path to test images
        y_dir: path to the corresponding test masks
        fnames: files to evaluate in the directories; assuming that the input and labels are the same name
            * if it's None, we assume that it's all of the files in x_dir.
        pad_shape: of size (x,y); doesn't include batch size and channels
        batch_size: prediction batch size
        mean_patient_shape: (z,x,y) representing the average shape. Defaults to (115, 320, 232).
        ct: whether or not the data is a CT scan or not. Defaults to False.
    Returns:
        actual_y: actual labels
        padded_pred: stacked, thresholded predictions (padded to original shape)
        padded_recon: stacked, properly padded reconstruction. Defaults to None if the model only outputs segmentations.
        orig_images: original input images
    """
    # can automatically infer the filenames to use (ensure that there are no junk files in x_dir)
    if fnames is None:
        fnames = os.listdir(x_dir)
    # lists that hold the arrays
    y_list = []
    pred_list = []
    recon_list = []
    orig_list = []
    for id in fnames:
        # loads sample as a 3D numpy arr and then changes the type to float32
        x = nib.load(os.path.join(x_dir, id))
        y = nib.load(os.path.join(y_dir, id))
        orig_images, actual_label = nii_to_np(x), nii_to_np(
            y)  # purpose is to transpose axes to (z,x, y)
        orig_shape = orig_images.shape + (1, )
        # preprocessing
        preprocessed_x, preprocessed_y, coords = isensee_preprocess(x, y, orig_spacing = None, get_coords = True, ct = \
                                                                    ct, mean_patient_shape = mean_patient_shape)
        # pad to model input shape (predicting on a slicewise basis)
        _pad_shape = (
            preprocessed_x.shape[0],
        ) + pad_shape  # unique to each volume because the n_slice varies
        # preparing the shape for the model (reshaping to model input shape and adding a channel dimension)
        reshaped_x = np.expand_dims(
            reshape(preprocessed_x, preprocessed_x.min(),
                    new_shape=_pad_shape), -1)
        # prediction
        print("Predicting: ", id)
        predicted = model.predict(reshaped_x, batch_size=batch_size)
        # inferring that the model has a reconstruction decoder based on the outputted predictions
        if isinstance(predicted, (list, tuple)):
            predicted, reconstruction = predicted
            # properly converting the reconstruction to the original shape
            padded_recon = undo_reshape_and_nonint_extraction(reconstruction, prior_reshape_shape = preprocessed_x.shape, \
                                                             orig_shape = orig_shape, coords = coords, pad_value = 0)
            recon_list.append(padded_recon)
        # thresholding
        predicted[predicted >= 0.5] = 1
        predicted[predicted < 0.5] = 0
        # properly converting the prediction mask to the original shape
        padded_pred = undo_reshape_and_nonint_extraction(predicted, prior_reshape_shape = preprocessed_x.shape, \
                                                         orig_shape = orig_shape, coords = coords, pad_value = 0)
        y_list.append(actual_label), pred_list.append(
            padded_pred), orig_list.append(orig_images)
    # stacking the lists
    actual_y, padded_pred, orig_images = np.vstack(y_list), np.vstack(
        pred_list), np.vstack(orig_list)
    try:
        padded_recon = np.vstack(recon_list)
    except ValueError:  # can't stack empty list
        padded_recon = None
    return (actual_y, padded_pred, padded_recon, orig_images)