def main(hdf_file: str, data_dir: str): keys = [ FileTypes.T1, FileTypes.T2, FileTypes.GT, FileTypes.MASK, FileTypes.AGE, FileTypes.GPA, FileTypes.SEX ] crawler = pymia_load.FileSystemDataCrawler(data_dir, keys, DataSetFilePathGenerator(), DirectoryFilter(), '.mha') subjects = [ Subject(id_, file_dict) for id_, file_dict in crawler.data.items() ] if os.path.exists(hdf_file): os.remove(hdf_file) with pymia_crt.get_writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) # normalize the images and unsqueeze the labels and mask. # Unsqueeze is needed due to the convention to have the number of channels as last dimension. # I.e., here we have the shape 10 x 256 x 256 before the unsqueeze operation and after 10 x 256 x 256 x 1 transform = pymia_tfm.ComposeTransform([ pymia_tfm.IntensityNormalization(loop_axis=3, entries=('images', )), pymia_tfm.UnSqueeze(entries=('labels', 'mask')) ]) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform)
def main(hdf_file: str, data_dir: str): if os.path.exists(hdf_file): raise RuntimeError( 'Dataset file "{}" does already exist'.format(hdf_file)) # let's create some sample data np.random.seed(42) # to have same sample data create_sample_data(data_dir, no_subjects=8) # collect the data collector = Collector(data_dir) subjects = collector.get_subject_files() for subject in subjects: print(subject.subject) # get the values for parametric map normalization min_, max_ = get_normalization_values(subjects, LoadData()) with pymia_crt.Hdf5Writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) callbacks.callbacks.append( WriteNormalizationCallback(writer, min_, max_)) transform = pymia_tfm.ComposeTransform([ tfm.MRFMaskedLabelNormalization(min_, max_, data.ID_MASK_FG), pymia_tfm.IntensityNormalization(loop_axis=4, entries=(pymia_def.KEY_IMAGES, )), ]) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform, concat_fn=concat)
def main(hdf_file: str, data_dir: str): keys = [ FileTypes.T1, FileTypes.T2, FileTypes.GT, FileTypes.MASK, FileTypes.AGE, FileTypes.GPA, FileTypes.SEX ] crawler = pymia_load.FileSystemDataCrawler(data_dir, keys, DataSetFilePathGenerator(), DirectoryFilter(), '.mha') subjects = [ Subject(id_, file_dict) for id_, file_dict in crawler.data.items() ] if os.path.exists(hdf_file): os.remove(hdf_file) with pymia_crt.get_writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) # add a transform to normalize the images transform = pymia_tfm.IntensityNormalization(loop_axis=3, entries=('images', )) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform)
def get_params(out_file: str, in_dir=dirs.BRATS_ORIG_DATA_DIR, split_file=os.path.join(dirs.SPLITS_DIR, 'split_brats18_100-25-160.json'), is_train_data=True, prediction_path=None, label_fn=None, add_prediction_fn=None): if add_prediction_fn is None: add_prediction_fn = add_predictions params = BuildParameters( in_dir=in_dir, out_file=out_file, transforms=[tfm.IntensityNormalization(loop_axis=-1)], split_file=split_file, is_train_data=is_train_data, prediction_path=prediction_path, add_prediction_fn=add_prediction_fn) if label_fn is not None: params.transforms.append( tfm.LambdaTransform(label_fn, entries=('labels', ))) return params
def main(hdf_file: str, data_dir: str, meta: bool): # collect the files for each subject subjects = get_subject_files(data_dir) # remove the "old" dataset if it exists if os.path.exists(hdf_file): os.remove(hdf_file) with crt.get_writer(hdf_file) as writer: # initialize the callbacks that will actually write the data to the dataset file callbacks = crt.get_default_callbacks(writer, meta_only=meta) # add a transform to normalize the images transform = tfm.IntensityNormalization(loop_axis=3, entries=(defs.KEY_IMAGES, )) # run through the subject files (loads them, applies transformations, and calls the callback for writing them) traverser = crt.Traverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform)
def main(hdf_file: str, data_dir: str): if os.path.exists(hdf_file): raise RuntimeError('Dataset file "{}" does exist already'.format(hdf_file)) # we threshold I_Q at probability 0.1 probability_threshold = 0.1 # we use image information extracted from 5^3 neighborhood around each point spatial_size = 5 # let's create some sample data np.random.seed(42) # to have same sample data create_sample_data(data_dir, no_subjects=8) # collect the data collector = Collector(data_dir) subjects = collector.get_subject_files() for subject in subjects: print(subject.subject) print('Total of {} subjects'.format(len(subjects))) os.makedirs(os.path.dirname(hdf_file), exist_ok=True) with pymia_crt.get_writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) transform = pymia_tfm.ComposeTransform([ pymia_tfm.LambdaTransform(lambda_fn=lambda np_data: np_data.astype(np.float32), entries=('images', data.KEY_IMAGE_INFORMATION, )), pymia_tfm.LambdaTransform(loop_axis=1, entries=('images', ), lambda_fn=normalize_unit_cube), pymia_tfm.IntensityNormalization(loop_axis=-1, entries=(data.KEY_IMAGE_INFORMATION,)) ]) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(probability_threshold, spatial_size), transform=transform, concat_fn=concat)