def test_pad(self): image = self.sample_subject.t1 padding = 1, 2, 3, 4, 5, 6 sitk_image = image.as_sitk() low, high = padding[::2], padding[1::2] sitk_padded = sitk.ConstantPad(sitk_image, low, high, 0) tio_padded = Pad(padding, padding_mode=0)(image) sitk_tensor, sitk_affine = sitk_to_nib(sitk_padded) tio_tensor, tio_affine = sitk_to_nib(tio_padded.as_sitk()) self.assertTensorEqual(sitk_tensor, tio_tensor) self.assertTensorEqual(sitk_affine, tio_affine)
def test_transforms(self): landmarks_dict = dict( t1=np.linspace(0, 100, 13), t2=np.linspace(0, 100, 13), ) transforms = ( CenterCropOrPad((9, 21, 30)), ToCanonical(), Resample((1, 1.1, 1.25)), RandomFlip(axes=(0, 1, 2), flip_probability=1), RandomMotion(proportion_to_augment=1), RandomGhosting(proportion_to_augment=1, axes=(0, 1, 2)), RandomSpike(), RandomNoise(), RandomBlur(), RandomSwap(patch_size=2, num_iterations=5), Lambda(lambda x: 1.5 * x, types_to_apply=INTENSITY), RandomBiasField(), Rescale((0, 1)), ZNormalization(masking_method='label'), HistogramStandardization(landmarks_dict=landmarks_dict), RandomElasticDeformation(proportion_to_augment=1), RandomAffine(), Pad((1, 2, 3, 0, 5, 6)), Crop((3, 2, 8, 0, 1, 4)), ) transformed = self.get_sample() for transform in transforms: transformed = transform(transformed)
def ImagesFromDataFrame(dataframe, psize, headers, q_max_length=10, q_samples_per_volume=1, q_num_workers=2, q_verbose=False, sampler='label', train=True, augmentations=None, preprocessing=None, in_memory=False): # Finding the dimension of the dataframe for computational purposes later num_row, num_col = dataframe.shape # num_channels = num_col - 1 # for non-segmentation tasks, this might be different # changing the column indices to make it easier dataframe.columns = range(0, num_col) dataframe.index = range(0, num_row) # This list will later contain the list of subjects subjects_list = [] channelHeaders = headers['channelHeaders'] labelHeader = headers['labelHeader'] predictionHeaders = headers['predictionHeaders'] subjectIDHeader = headers['subjectIDHeader'] sampler = sampler.lower() # for easier parsing # define the control points and swap axes for augmentation augmentation_patchAxesPoints = copy.deepcopy(psize) for i in range(len(augmentation_patchAxesPoints)): augmentation_patchAxesPoints[i] = max( round(augmentation_patchAxesPoints[i] / 10), 1) # always at least have 1 # iterating through the dataframe resizeCheck = False for patient in range(num_row): # We need this dict for storing the meta data for each subject # such as different image modalities, labels, any other data subject_dict = {} subject_dict['subject_id'] = dataframe[subjectIDHeader][patient] # iterating through the channels/modalities/timepoints of the subject for channel in channelHeaders: # assigning the dict key to the channel if not in_memory: subject_dict[str(channel)] = Image(str( dataframe[channel][patient]), type=torchio.INTENSITY) else: img = sitk.ReadImage(str(dataframe[channel][patient])) array = np.expand_dims(sitk.GetArrayFromImage(img), axis=0) subject_dict[str(channel)] = Image( tensor=array, type=torchio.INTENSITY, path=dataframe[channel][patient]) # if resize has been defined but resample is not (or is none) if not resizeCheck: if not (preprocessing is None) and ('resize' in preprocessing): if (preprocessing['resize'] is not None): resizeCheck = True if not ('resample' in preprocessing): preprocessing['resample'] = {} if not ('resolution' in preprocessing['resample']): preprocessing['resample'][ 'resolution'] = resize_image_resolution( subject_dict[str(channel)].as_sitk(), preprocessing['resize']) else: print( 'WARNING: \'resize\' is ignored as \'resample\' is defined under \'data_processing\', this will be skipped', file=sys.stderr) else: resizeCheck = True # # for regression # if predictionHeaders: # # get the mask # if (subject_dict['label'] is None) and (class_list is not None): # sys.exit('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient) if labelHeader is not None: if not in_memory: subject_dict['label'] = Image(str( dataframe[labelHeader][patient]), type=torchio.LABEL) else: img = sitk.ReadImage(str(dataframe[labelHeader][patient])) array = np.expand_dims(sitk.GetArrayFromImage(img), axis=0) subject_dict['label'] = Image( tensor=array, type=torchio.LABEL, path=dataframe[labelHeader][patient]) subject_dict['path_to_metadata'] = str( dataframe[labelHeader][patient]) else: subject_dict['label'] = "NA" subject_dict['path_to_metadata'] = str(dataframe[channel][patient]) # iterating through the values to predict of the subject valueCounter = 0 for values in predictionHeaders: # assigning the dict key to the channel subject_dict['value_' + str(valueCounter)] = np.array( dataframe[values][patient]) valueCounter = valueCounter + 1 # Initializing the subject object using the dict subject = Subject(subject_dict) # padding image, but only for label sampler, because we don't want to pad for uniform if 'label' in sampler or 'weight' in sampler: psize_pad = list( np.asarray(np.round(np.divide(psize, 2)), dtype=int)) padder = Pad( psize_pad, padding_mode='symmetric' ) # for modes: https://numpy.org/doc/stable/reference/generated/numpy.pad.html subject = padder(subject) # Appending this subject to the list of subjects subjects_list.append(subject) augmentation_list = [] # first, we want to do thresholding, followed by clipping, if it is present - required for inference as well if not (preprocessing is None): if train: # we want the crop to only happen during training if 'crop_external_zero_planes' in preprocessing: augmentation_list.append( global_preprocessing_dict['crop_external_zero_planes']( psize)) for key in ['threshold', 'clip']: if key in preprocessing: augmentation_list.append(global_preprocessing_dict[key]( min=preprocessing[key]['min'], max=preprocessing[key]['max'])) # first, we want to do the resampling, if it is present - required for inference as well if 'resample' in preprocessing: if 'resolution' in preprocessing['resample']: # resample_split = str(aug).split(':') resample_values = tuple( np.array(preprocessing['resample']['resolution']).astype( np.float)) if len(resample_values) == 2: resample_values = tuple(np.append(resample_values, 1)) augmentation_list.append(Resample(resample_values)) # next, we want to do the intensity normalize - required for inference as well if 'normalize' in preprocessing: augmentation_list.append(global_preprocessing_dict['normalize']) elif 'normalize_nonZero' in preprocessing: augmentation_list.append( global_preprocessing_dict['normalize_nonZero']) elif 'normalize_nonZero_masked' in preprocessing: augmentation_list.append( global_preprocessing_dict['normalize_nonZero_masked']) # other augmentations should only happen for training - and also setting the probabilities # for the augmentations if train and not (augmentations == None): for aug in augmentations: if aug != 'default_probability': actual_function = None if aug == 'flip': if ('axes_to_flip' in augmentations[aug]): print( 'WARNING: \'flip\' augmentation needs the key \'axis\' instead of \'axes_to_flip\'', file=sys.stderr) augmentations[aug]['axis'] = augmentations[aug][ 'axes_to_flip'] actual_function = global_augs_dict[aug]( axes=augmentations[aug]['axis'], p=augmentations[aug]['probability']) elif aug in ['rotate_90', 'rotate_180']: for axis in augmentations[aug]['axis']: augmentation_list.append(global_augs_dict[aug]( axis=axis, p=augmentations[aug]['probability'])) elif aug in ['swap', 'elastic']: actual_function = global_augs_dict[aug]( patch_size=augmentation_patchAxesPoints, p=augmentations[aug]['probability']) elif aug == 'blur': actual_function = global_augs_dict[aug]( std=augmentations[aug]['std'], p=augmentations[aug]['probability']) elif aug == 'noise': actual_function = global_augs_dict[aug]( mean=augmentations[aug]['mean'], std=augmentations[aug]['std'], p=augmentations[aug]['probability']) elif aug == 'anisotropic': actual_function = global_augs_dict[aug]( axes=augmentations[aug]['axis'], downsampling=augmentations[aug]['downsampling'], p=augmentations[aug]['probability']) else: actual_function = global_augs_dict[aug]( p=augmentations[aug]['probability']) if actual_function is not None: augmentation_list.append(actual_function) if augmentation_list: transform = Compose(augmentation_list) else: transform = None subjects_dataset = torchio.SubjectsDataset(subjects_list, transform=transform) if not train: return subjects_dataset if sampler in ('weighted', 'weightedsampler', 'weightedsample'): sampler = global_sampler_dict[sampler](psize, probability_map='label') else: sampler = global_sampler_dict[sampler](psize) # all of these need to be read from model.yaml patches_queue = torchio.Queue(subjects_dataset, max_length=q_max_length, samples_per_volume=q_samples_per_volume, sampler=sampler, num_workers=q_num_workers, shuffle_subjects=True, shuffle_patches=True, verbose=q_verbose) return patches_queue
def ImagesFromDataFrame( dataframe, parameters, train, apply_zero_crop=False, loader_type="" ): """ Reads the pandas dataframe and gives the dataloader to use for training/validation/testing Parameters ---------- dataframe : pandas.DataFrame The main input dataframe which is calculated after splitting the data CSV parameters : dict The parameters dictionary train : bool If the dataloader is for training or not. For training, the patching infrastructure and data augmentation is applied. apply_zero_crop : bool If enabled, the crop_external_zero_plane is applied. loader_type : str Type of loader for printing. Returns ------- subjects_dataset: torchio.SubjectsDataset This is the output for validation/testing, where patching and data augmentation is disregarded patches_queue: torchio.Queue This is the output for training, which is the subjects_dataset queue after patching and data augmentation is taken into account """ # store in previous variable names patch_size = parameters["patch_size"] headers = parameters["headers"] q_max_length = parameters["q_max_length"] q_samples_per_volume = parameters["q_samples_per_volume"] q_num_workers = parameters["q_num_workers"] q_verbose = parameters["q_verbose"] sampler = parameters["patch_sampler"] augmentations = parameters["data_augmentation"] preprocessing = parameters["data_preprocessing"] in_memory = parameters["in_memory"] enable_padding = parameters["enable_padding"] # Finding the dimension of the dataframe for computational purposes later num_row, num_col = dataframe.shape # changing the column indices to make it easier dataframe.columns = range(0, num_col) dataframe.index = range(0, num_row) # This list will later contain the list of subjects subjects_list = [] subjects_with_error = [] channelHeaders = headers["channelHeaders"] labelHeader = headers["labelHeader"] predictionHeaders = headers["predictionHeaders"] subjectIDHeader = headers["subjectIDHeader"] # this basically means that label sampler is selected with padding if isinstance(sampler, dict): sampler_padding = sampler["label"]["padding_type"] sampler = "label" else: sampler = sampler.lower() # for easier parsing sampler_padding = "symmetric" resize_images_flag = False # if resize has been defined but resample is not (or is none) if not (preprocessing is None): for key in preprocessing.keys(): # check for different resizing keys if key in ["resize", "resize_image", "resize_images"]: if not (preprocessing[key] is None): resize_images_flag = True preprocessing["resize_image"] = preprocessing[key] break # iterating through the dataframe for patient in tqdm( range(num_row), desc="Constructing queue for " + loader_type + " data" ): # We need this dict for storing the meta data for each subject # such as different image modalities, labels, any other data subject_dict = {} subject_dict["subject_id"] = str(dataframe[subjectIDHeader][patient]) skip_subject = False # iterating through the channels/modalities/timepoints of the subject for channel in channelHeaders: # sanity check for malformed csv if not os.path.isfile(str(dataframe[channel][patient])): skip_subject = True subject_dict[str(channel)] = torchio.ScalarImage( dataframe[channel][patient] ) # store image spacing information if not present if "spacing" not in subject_dict: file_reader = sitk.ImageFileReader() file_reader.SetFileName(dataframe[channel][patient]) file_reader.ReadImageInformation() subject_dict["spacing"] = torch.Tensor(file_reader.GetSpacing()) # if resize_image is requested, the perform per-image resize with appropriate interpolator if resize_images_flag: img_resized = resize_image( subject_dict[str(channel)].as_sitk(), preprocessing["resize_image"] ) # always ensure resized image spacing is used subject_dict["spacing"] = torch.Tensor(img_resized.GetSpacing()) subject_dict[str(channel)] = torchio.ScalarImage.from_sitk(img_resized) # # for regression -- this logic needs to be thought through # if predictionHeaders: # # get the mask # if (subject_dict['label'] is None) and (class_list is not None): # sys.exit('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient) if labelHeader is not None: if not os.path.isfile(str(dataframe[labelHeader][patient])): skip_subject = True subject_dict["label"] = torchio.LabelMap(dataframe[labelHeader][patient]) subject_dict["path_to_metadata"] = str(dataframe[labelHeader][patient]) # if resize is requested, the perform per-image resize with appropriate interpolator if resize_images_flag: img_resized = resize_image( subject_dict["label"].as_sitk(), preprocessing["resize_image"], sitk.sitkNearestNeighbor, ) subject_dict["label"] = torchio.LabelMap.from_sitk(img_resized) else: subject_dict["label"] = "NA" subject_dict["path_to_metadata"] = str(dataframe[channel][patient]) # iterating through the values to predict of the subject valueCounter = 0 for values in predictionHeaders: # assigning the dict key to the channel subject_dict["value_" + str(valueCounter)] = np.array( dataframe[values][patient] ) valueCounter += 1 # skip subject the condition was tripped if not skip_subject: # Initializing the subject object using the dict subject = torchio.Subject(subject_dict) # https://github.com/fepegar/torchio/discussions/587#discussioncomment-928834 # this is causing memory usage to explode, see https://github.com/CBICA/GaNDLF/issues/128 if parameters["verbose"]: print( "Checking consistency of images in subject '" + subject["subject_id"] + "'" ) try: perform_sanity_check_on_subject(subject, parameters) except Exception as e: subjects_with_error.append(subject["subject_id"]) # # padding image, but only for label sampler, because we don't want to pad for uniform if "label" in sampler or "weight" in sampler: if enable_padding: psize_pad = list( np.asarray(np.ceil(np.divide(patch_size, 2)), dtype=int) ) # for modes: https://numpy.org/doc/stable/reference/generated/numpy.pad.html padder = Pad(psize_pad, padding_mode=sampler_padding) subject = padder(subject) # load subject into memory: https://github.com/fepegar/torchio/discussions/568#discussioncomment-859027 if in_memory: subject.load() # Appending this subject to the list of subjects subjects_list.append(subject) if subjects_with_error: raise ValueError( "The following subjects could not be loaded, please recheck or remove and retry:", subjects_with_error, ) transformations_list = [] # augmentations are applied to the training set only if train and not (augmentations == None): for aug in augmentations: aug_lower = aug.lower() if aug_lower in global_augs_dict: transformations_list.append( global_augs_dict[aug_lower](augmentations[aug]) ) transform = get_transforms_for_preprocessing( parameters, transformations_list, train, apply_zero_crop ) subjects_dataset = torchio.SubjectsDataset(subjects_list, transform=transform) if not train: return subjects_dataset if sampler in ("weighted", "weightedsampler", "weightedsample"): sampler = global_sampler_dict[sampler](patch_size, probability_map="label") else: sampler = global_sampler_dict[sampler](patch_size) # all of these need to be read from model.yaml patches_queue = torchio.Queue( subjects_dataset, max_length=q_max_length, samples_per_volume=q_samples_per_volume, sampler=sampler, num_workers=q_num_workers, shuffle_subjects=True, shuffle_patches=True, verbose=q_verbose, ) return patches_queue