コード例 #1
0
ファイル: test_pad.py プロジェクト: soumickmj/torchio
 def test_pad(self):
     image = self.sample_subject.t1
     padding = 1, 2, 3, 4, 5, 6
     sitk_image = image.as_sitk()
     low, high = padding[::2], padding[1::2]
     sitk_padded = sitk.ConstantPad(sitk_image, low, high, 0)
     tio_padded = Pad(padding, padding_mode=0)(image)
     sitk_tensor, sitk_affine = sitk_to_nib(sitk_padded)
     tio_tensor, tio_affine = sitk_to_nib(tio_padded.as_sitk())
     self.assertTensorEqual(sitk_tensor, tio_tensor)
     self.assertTensorEqual(sitk_affine, tio_affine)
コード例 #2
0
 def test_transforms(self):
     landmarks_dict = dict(
         t1=np.linspace(0, 100, 13),
         t2=np.linspace(0, 100, 13),
     )
     transforms = (
         CenterCropOrPad((9, 21, 30)),
         ToCanonical(),
         Resample((1, 1.1, 1.25)),
         RandomFlip(axes=(0, 1, 2), flip_probability=1),
         RandomMotion(proportion_to_augment=1),
         RandomGhosting(proportion_to_augment=1, axes=(0, 1, 2)),
         RandomSpike(),
         RandomNoise(),
         RandomBlur(),
         RandomSwap(patch_size=2, num_iterations=5),
         Lambda(lambda x: 1.5 * x, types_to_apply=INTENSITY),
         RandomBiasField(),
         Rescale((0, 1)),
         ZNormalization(masking_method='label'),
         HistogramStandardization(landmarks_dict=landmarks_dict),
         RandomElasticDeformation(proportion_to_augment=1),
         RandomAffine(),
         Pad((1, 2, 3, 0, 5, 6)),
         Crop((3, 2, 8, 0, 1, 4)),
     )
     transformed = self.get_sample()
     for transform in transforms:
         transformed = transform(transformed)
コード例 #3
0
def ImagesFromDataFrame(dataframe,
                        psize,
                        headers,
                        q_max_length=10,
                        q_samples_per_volume=1,
                        q_num_workers=2,
                        q_verbose=False,
                        sampler='label',
                        train=True,
                        augmentations=None,
                        preprocessing=None,
                        in_memory=False):
    # Finding the dimension of the dataframe for computational purposes later
    num_row, num_col = dataframe.shape
    # num_channels = num_col - 1 # for non-segmentation tasks, this might be different
    # changing the column indices to make it easier
    dataframe.columns = range(0, num_col)
    dataframe.index = range(0, num_row)
    # This list will later contain the list of subjects
    subjects_list = []

    channelHeaders = headers['channelHeaders']
    labelHeader = headers['labelHeader']
    predictionHeaders = headers['predictionHeaders']
    subjectIDHeader = headers['subjectIDHeader']

    sampler = sampler.lower()  # for easier parsing

    # define the control points and swap axes for augmentation
    augmentation_patchAxesPoints = copy.deepcopy(psize)
    for i in range(len(augmentation_patchAxesPoints)):
        augmentation_patchAxesPoints[i] = max(
            round(augmentation_patchAxesPoints[i] / 10),
            1)  # always at least have 1

    # iterating through the dataframe
    resizeCheck = False
    for patient in range(num_row):
        # We need this dict for storing the meta data for each subject
        # such as different image modalities, labels, any other data
        subject_dict = {}
        subject_dict['subject_id'] = dataframe[subjectIDHeader][patient]
        # iterating through the channels/modalities/timepoints of the subject
        for channel in channelHeaders:
            # assigning the dict key to the channel
            if not in_memory:
                subject_dict[str(channel)] = Image(str(
                    dataframe[channel][patient]),
                                                   type=torchio.INTENSITY)
            else:
                img = sitk.ReadImage(str(dataframe[channel][patient]))
                array = np.expand_dims(sitk.GetArrayFromImage(img), axis=0)
                subject_dict[str(channel)] = Image(
                    tensor=array,
                    type=torchio.INTENSITY,
                    path=dataframe[channel][patient])

            # if resize has been defined but resample is not (or is none)
            if not resizeCheck:
                if not (preprocessing is None) and ('resize' in preprocessing):
                    if (preprocessing['resize'] is not None):
                        resizeCheck = True
                        if not ('resample' in preprocessing):
                            preprocessing['resample'] = {}
                            if not ('resolution' in preprocessing['resample']):
                                preprocessing['resample'][
                                    'resolution'] = resize_image_resolution(
                                        subject_dict[str(channel)].as_sitk(),
                                        preprocessing['resize'])
                        else:
                            print(
                                'WARNING: \'resize\' is ignored as \'resample\' is defined under \'data_processing\', this will be skipped',
                                file=sys.stderr)
                else:
                    resizeCheck = True

        # # for regression
        # if predictionHeaders:
        #     # get the mask
        #     if (subject_dict['label'] is None) and (class_list is not None):
        #         sys.exit('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient)

        if labelHeader is not None:
            if not in_memory:
                subject_dict['label'] = Image(str(
                    dataframe[labelHeader][patient]),
                                              type=torchio.LABEL)
            else:
                img = sitk.ReadImage(str(dataframe[labelHeader][patient]))
                array = np.expand_dims(sitk.GetArrayFromImage(img), axis=0)
                subject_dict['label'] = Image(
                    tensor=array,
                    type=torchio.LABEL,
                    path=dataframe[labelHeader][patient])

            subject_dict['path_to_metadata'] = str(
                dataframe[labelHeader][patient])
        else:
            subject_dict['label'] = "NA"
            subject_dict['path_to_metadata'] = str(dataframe[channel][patient])

        # iterating through the values to predict of the subject
        valueCounter = 0
        for values in predictionHeaders:
            # assigning the dict key to the channel
            subject_dict['value_' + str(valueCounter)] = np.array(
                dataframe[values][patient])
            valueCounter = valueCounter + 1

        # Initializing the subject object using the dict
        subject = Subject(subject_dict)

        # padding image, but only for label sampler, because we don't want to pad for uniform
        if 'label' in sampler or 'weight' in sampler:
            psize_pad = list(
                np.asarray(np.round(np.divide(psize, 2)), dtype=int))
            padder = Pad(
                psize_pad, padding_mode='symmetric'
            )  # for modes: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
            subject = padder(subject)

        # Appending this subject to the list of subjects
        subjects_list.append(subject)

    augmentation_list = []

    # first, we want to do thresholding, followed by clipping, if it is present - required for inference as well
    if not (preprocessing is None):
        if train:  # we want the crop to only happen during training
            if 'crop_external_zero_planes' in preprocessing:
                augmentation_list.append(
                    global_preprocessing_dict['crop_external_zero_planes'](
                        psize))
        for key in ['threshold', 'clip']:
            if key in preprocessing:
                augmentation_list.append(global_preprocessing_dict[key](
                    min=preprocessing[key]['min'],
                    max=preprocessing[key]['max']))

        # first, we want to do the resampling, if it is present - required for inference as well
        if 'resample' in preprocessing:
            if 'resolution' in preprocessing['resample']:
                # resample_split = str(aug).split(':')
                resample_values = tuple(
                    np.array(preprocessing['resample']['resolution']).astype(
                        np.float))
                if len(resample_values) == 2:
                    resample_values = tuple(np.append(resample_values, 1))
                augmentation_list.append(Resample(resample_values))

        # next, we want to do the intensity normalize - required for inference as well
        if 'normalize' in preprocessing:
            augmentation_list.append(global_preprocessing_dict['normalize'])
        elif 'normalize_nonZero' in preprocessing:
            augmentation_list.append(
                global_preprocessing_dict['normalize_nonZero'])
        elif 'normalize_nonZero_masked' in preprocessing:
            augmentation_list.append(
                global_preprocessing_dict['normalize_nonZero_masked'])

    # other augmentations should only happen for training - and also setting the probabilities
    # for the augmentations
    if train and not (augmentations == None):
        for aug in augmentations:
            if aug != 'default_probability':
                actual_function = None

                if aug == 'flip':
                    if ('axes_to_flip' in augmentations[aug]):
                        print(
                            'WARNING: \'flip\' augmentation needs the key \'axis\' instead of \'axes_to_flip\'',
                            file=sys.stderr)
                        augmentations[aug]['axis'] = augmentations[aug][
                            'axes_to_flip']
                    actual_function = global_augs_dict[aug](
                        axes=augmentations[aug]['axis'],
                        p=augmentations[aug]['probability'])
                elif aug in ['rotate_90', 'rotate_180']:
                    for axis in augmentations[aug]['axis']:
                        augmentation_list.append(global_augs_dict[aug](
                            axis=axis, p=augmentations[aug]['probability']))
                elif aug in ['swap', 'elastic']:
                    actual_function = global_augs_dict[aug](
                        patch_size=augmentation_patchAxesPoints,
                        p=augmentations[aug]['probability'])
                elif aug == 'blur':
                    actual_function = global_augs_dict[aug](
                        std=augmentations[aug]['std'],
                        p=augmentations[aug]['probability'])
                elif aug == 'noise':
                    actual_function = global_augs_dict[aug](
                        mean=augmentations[aug]['mean'],
                        std=augmentations[aug]['std'],
                        p=augmentations[aug]['probability'])
                elif aug == 'anisotropic':
                    actual_function = global_augs_dict[aug](
                        axes=augmentations[aug]['axis'],
                        downsampling=augmentations[aug]['downsampling'],
                        p=augmentations[aug]['probability'])
                else:
                    actual_function = global_augs_dict[aug](
                        p=augmentations[aug]['probability'])
                if actual_function is not None:
                    augmentation_list.append(actual_function)

    if augmentation_list:
        transform = Compose(augmentation_list)
    else:
        transform = None
    subjects_dataset = torchio.SubjectsDataset(subjects_list,
                                               transform=transform)
    if not train:
        return subjects_dataset
    if sampler in ('weighted', 'weightedsampler', 'weightedsample'):
        sampler = global_sampler_dict[sampler](psize, probability_map='label')
    else:
        sampler = global_sampler_dict[sampler](psize)
    # all of these need to be read from model.yaml
    patches_queue = torchio.Queue(subjects_dataset,
                                  max_length=q_max_length,
                                  samples_per_volume=q_samples_per_volume,
                                  sampler=sampler,
                                  num_workers=q_num_workers,
                                  shuffle_subjects=True,
                                  shuffle_patches=True,
                                  verbose=q_verbose)
    return patches_queue
コード例 #4
0
def ImagesFromDataFrame(
    dataframe, parameters, train, apply_zero_crop=False, loader_type=""
):
    """
    Reads the pandas dataframe and gives the dataloader to use for training/validation/testing

    Parameters
    ----------
    dataframe : pandas.DataFrame
        The main input dataframe which is calculated after splitting the data CSV
    parameters : dict
        The parameters dictionary
    train : bool
        If the dataloader is for training or not. For training, the patching infrastructure and data augmentation is applied.
    apply_zero_crop : bool
        If enabled, the crop_external_zero_plane is applied.
    loader_type : str
        Type of loader for printing.

    Returns
    -------
    subjects_dataset: torchio.SubjectsDataset
        This is the output for validation/testing, where patching and data augmentation is disregarded
    patches_queue: torchio.Queue
        This is the output for training, which is the subjects_dataset queue after patching and data augmentation is taken into account
    """
    # store in previous variable names
    patch_size = parameters["patch_size"]
    headers = parameters["headers"]
    q_max_length = parameters["q_max_length"]
    q_samples_per_volume = parameters["q_samples_per_volume"]
    q_num_workers = parameters["q_num_workers"]
    q_verbose = parameters["q_verbose"]
    sampler = parameters["patch_sampler"]
    augmentations = parameters["data_augmentation"]
    preprocessing = parameters["data_preprocessing"]
    in_memory = parameters["in_memory"]
    enable_padding = parameters["enable_padding"]

    # Finding the dimension of the dataframe for computational purposes later
    num_row, num_col = dataframe.shape
    # changing the column indices to make it easier
    dataframe.columns = range(0, num_col)
    dataframe.index = range(0, num_row)
    # This list will later contain the list of subjects
    subjects_list = []
    subjects_with_error = []

    channelHeaders = headers["channelHeaders"]
    labelHeader = headers["labelHeader"]
    predictionHeaders = headers["predictionHeaders"]
    subjectIDHeader = headers["subjectIDHeader"]

    # this basically means that label sampler is selected with padding
    if isinstance(sampler, dict):
        sampler_padding = sampler["label"]["padding_type"]
        sampler = "label"
    else:
        sampler = sampler.lower()  # for easier parsing
        sampler_padding = "symmetric"

    resize_images_flag = False
    # if resize has been defined but resample is not (or is none)
    if not (preprocessing is None):
        for key in preprocessing.keys():
            # check for different resizing keys
            if key in ["resize", "resize_image", "resize_images"]:
                if not (preprocessing[key] is None):
                    resize_images_flag = True
                    preprocessing["resize_image"] = preprocessing[key]
                    break

    # iterating through the dataframe
    for patient in tqdm(
        range(num_row), desc="Constructing queue for " + loader_type + " data"
    ):
        # We need this dict for storing the meta data for each subject
        # such as different image modalities, labels, any other data
        subject_dict = {}
        subject_dict["subject_id"] = str(dataframe[subjectIDHeader][patient])
        skip_subject = False
        # iterating through the channels/modalities/timepoints of the subject
        for channel in channelHeaders:
            # sanity check for malformed csv
            if not os.path.isfile(str(dataframe[channel][patient])):
                skip_subject = True

            subject_dict[str(channel)] = torchio.ScalarImage(
                dataframe[channel][patient]
            )

            # store image spacing information if not present
            if "spacing" not in subject_dict:
                file_reader = sitk.ImageFileReader()
                file_reader.SetFileName(dataframe[channel][patient])
                file_reader.ReadImageInformation()
                subject_dict["spacing"] = torch.Tensor(file_reader.GetSpacing())

            # if resize_image is requested, the perform per-image resize with appropriate interpolator
            if resize_images_flag:
                img_resized = resize_image(
                    subject_dict[str(channel)].as_sitk(), preprocessing["resize_image"]
                )
                # always ensure resized image spacing is used
                subject_dict["spacing"] = torch.Tensor(img_resized.GetSpacing())
                subject_dict[str(channel)] = torchio.ScalarImage.from_sitk(img_resized)

        # # for regression -- this logic needs to be thought through
        # if predictionHeaders:
        #     # get the mask
        #     if (subject_dict['label'] is None) and (class_list is not None):
        #         sys.exit('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient)

        if labelHeader is not None:
            if not os.path.isfile(str(dataframe[labelHeader][patient])):
                skip_subject = True

            subject_dict["label"] = torchio.LabelMap(dataframe[labelHeader][patient])
            subject_dict["path_to_metadata"] = str(dataframe[labelHeader][patient])

            # if resize is requested, the perform per-image resize with appropriate interpolator
            if resize_images_flag:
                img_resized = resize_image(
                    subject_dict["label"].as_sitk(),
                    preprocessing["resize_image"],
                    sitk.sitkNearestNeighbor,
                )
                subject_dict["label"] = torchio.LabelMap.from_sitk(img_resized)

        else:
            subject_dict["label"] = "NA"
            subject_dict["path_to_metadata"] = str(dataframe[channel][patient])

        # iterating through the values to predict of the subject
        valueCounter = 0
        for values in predictionHeaders:
            # assigning the dict key to the channel
            subject_dict["value_" + str(valueCounter)] = np.array(
                dataframe[values][patient]
            )
            valueCounter += 1

        # skip subject the condition was tripped
        if not skip_subject:
            # Initializing the subject object using the dict
            subject = torchio.Subject(subject_dict)
            # https://github.com/fepegar/torchio/discussions/587#discussioncomment-928834
            # this is causing memory usage to explode, see https://github.com/CBICA/GaNDLF/issues/128
            if parameters["verbose"]:
                print(
                    "Checking consistency of images in subject '"
                    + subject["subject_id"]
                    + "'"
                )
            try:
                perform_sanity_check_on_subject(subject, parameters)
            except Exception as e:
                subjects_with_error.append(subject["subject_id"])

            # # padding image, but only for label sampler, because we don't want to pad for uniform
            if "label" in sampler or "weight" in sampler:
                if enable_padding:
                    psize_pad = list(
                        np.asarray(np.ceil(np.divide(patch_size, 2)), dtype=int)
                    )
                    # for modes: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
                    padder = Pad(psize_pad, padding_mode=sampler_padding)
                    subject = padder(subject)

            # load subject into memory: https://github.com/fepegar/torchio/discussions/568#discussioncomment-859027
            if in_memory:
                subject.load()

            # Appending this subject to the list of subjects
            subjects_list.append(subject)

    if subjects_with_error:
        raise ValueError(
            "The following subjects could not be loaded, please recheck or remove and retry:",
            subjects_with_error,
        )

    transformations_list = []

    # augmentations are applied to the training set only
    if train and not (augmentations == None):
        for aug in augmentations:
            aug_lower = aug.lower()
            if aug_lower in global_augs_dict:
                transformations_list.append(
                    global_augs_dict[aug_lower](augmentations[aug])
                )

    transform = get_transforms_for_preprocessing(
        parameters, transformations_list, train, apply_zero_crop
    )

    subjects_dataset = torchio.SubjectsDataset(subjects_list, transform=transform)
    if not train:
        return subjects_dataset
    if sampler in ("weighted", "weightedsampler", "weightedsample"):
        sampler = global_sampler_dict[sampler](patch_size, probability_map="label")
    else:
        sampler = global_sampler_dict[sampler](patch_size)
    # all of these need to be read from model.yaml
    patches_queue = torchio.Queue(
        subjects_dataset,
        max_length=q_max_length,
        samples_per_volume=q_samples_per_volume,
        sampler=sampler,
        num_workers=q_num_workers,
        shuffle_subjects=True,
        shuffle_patches=True,
        verbose=q_verbose,
    )
    return patches_queue