def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_lst, path_hdf5, contrast_balance=None, slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, object_detection_params=None, soft_gt=False): print("Starting conversion") # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst subject_file_lst = sorted(subject_file_lst) df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)] # Backward compatibility for subject_file_lst containing participant_ids instead of filenames if df_subjects.empty: df_subjects = bids_df.df[bids_df.df['participant_id'].isin( subject_file_lst)] subject_file_lst = sorted(df_subjects['filename'].to_list()) self.soft_gt = soft_gt self.dt = h5py.special_dtype(vlen=str) # opening an hdf5 file with write access and writing metadata # self.hdf5_file = h5py.File(hdf5_name, "w") self.path_hdf5 = path_hdf5 list_patients = [] self.filename_pairs = [] self.metadata = {} if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } self.prepro_transforms, self.transform = transform # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] for contrast in contrast_balance.keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_balance.keys()} # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_subject_fnames()))]['path'].to_list() # Load bounding box from list of path self.has_bounding_box = True bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, get_all_subj_path, slice_axis, contrast_lst) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() for subject in tqdm(subject_file_lst, desc="Loading dataset"): self.process_subject(bids_df, subject, df_subjects, c, tot, contrast_balance, target_suffix, all_deriv, roi_params, bounding_box_dict, metadata_choice, list_patients) self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn # Update HDF5 metadata with h5py.File(self.path_hdf5, "w") as hdf5_file: hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) hdf5_file.attrs['slice_axis'] = slice_axis hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True), ('filter_empty_mask', False)] hdf5_file.attrs['metadata_choice'] = metadata_choice # Save images into HDF5 file self._load_filenames() print("Files loaded.")
def __init__(self, root_dir, subject_lst, target_suffix, contrast_params, slice_axis=2, cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, roi_params=None, multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False): self.bids_ds = bids.BIDS(root_dir) self.roi_params = roi_params if roi_params is not None else { "suffix": None, "slice_filter_roi": None } self.soft_gt = soft_gt self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } bids_subjects = [ s for s in self.bids_ds.get_subjects() if s.record["subject_id"] in subject_lst ] # Create a list with the filenames for all contrasts and subjects subjects_tot = [] for subject in bids_subjects: subjects_tot.append(str(subject.record["absolute_path"])) # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: len([s for s in bids_subjects if s.record["modality"] == contrast]) for contrast in contrast_params["balance"].keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_params["balance"].keys()} multichannel_subjects = {} if multichannel: num_contrast = len(contrast_params["contrast_lst"]) idx_dict = {} for idx, contrast in enumerate(contrast_params["contrast_lst"]): idx_dict[contrast] = idx multichannel_subjects = { subject: { "absolute_paths": [None] * num_contrast, "deriv_path": None, "roi_filename": None, "metadata": [None] * num_contrast } for subject in subject_lst } bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, self.bids_ds.get_subjects(), slice_axis, contrast_params["contrast_lst"]) for subject in tqdm(bids_subjects, desc="Loading dataset"): if subject.record["modality"] in contrast_params["contrast_lst"]: # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance if subject.record["modality"] in contrast_params[ "balance"].keys(): c[subject. record["modality"]] = c[subject.record["modality"]] + 1 if c[subject.record["modality"]] / tot[subject.record[ "modality"]] > contrast_params["balance"][ subject.record["modality"]]: continue if not subject.has_derivative("labels"): print("Subject without derivative, skipping.") continue derivatives = subject.get_derivatives("labels") target_filename, roi_filename = [None ] * len(target_suffix), None for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if deriv.endswith(subject.record["modality"] + suffix + ".nii.gz"): target_filename[idx] = deriv if not (self.roi_params["suffix"] is None) and \ deriv.endswith(subject.record["modality"] + self.roi_params["suffix"] + ".nii.gz"): roi_filename = [deriv] if (not any(target_filename)) or ( not (self.roi_params["suffix"] is None) and (roi_filename is None)): continue if not subject.has_metadata(): metadata = {} else: metadata = subject.metadata() # add contrast to metadata metadata['contrast'] = subject.record["modality"] if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( subject.record["absolute_path"])][0] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]): continue elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None: # add custom data to metadata subject_id = subject.record["subject_id"] df = bids.BIDS(root_dir).participants.content if metadata_choice not in df.columns: raise ValueError( "The following metadata cannot be found in participants.tsv file: {}. " "Invalid metadata choice.".format(metadata_choice)) metadata[metadata_choice] = df[ df['participant_id'] == subject_id][metadata_choice].values[0] # Create metadata dict for OHE data_lst = sorted(set(df[metadata_choice].values)) metadata_dict = {} for idx, data in enumerate(data_lst): metadata_dict[data] = idx metadata['metadata_dict'] = metadata_dict # Fill multichannel dictionary if multichannel: idx = idx_dict[subject.record["modality"]] subj_id = subject.record["subject_id"] multichannel_subjects[subj_id]["absolute_paths"][ idx] = subject.record.absolute_path multichannel_subjects[subj_id][ "deriv_path"] = target_filename multichannel_subjects[subj_id]["metadata"][idx] = metadata if roi_filename: multichannel_subjects[subj_id][ "roi_filename"] = roi_filename else: self.filename_pairs.append( ([subject.record.absolute_path], target_filename, roi_filename, [metadata])) if multichannel: for subject in multichannel_subjects.values(): if None not in subject["absolute_paths"]: self.filename_pairs.append( (subject["absolute_paths"], subject["deriv_path"], subject["roi_filename"], subject["metadata"])) super().__init__(self.filename_pairs, slice_axis, cache, transform, slice_filter_fn, task, self.roi_params, self.soft_gt)
def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_lst, path_hdf5, contrast_balance=None, slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, object_detection_params=None, soft_gt=False): print("Starting conversion") # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst subject_file_lst = sorted(subject_file_lst) df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)] # Backward compatibility for subject_file_lst containing participant_ids instead of filenames if df_subjects.empty: df_subjects = bids_df.df[bids_df.df['participant_id'].isin( subject_file_lst)] subject_file_lst = sorted(df_subjects['filename'].to_list()) self.soft_gt = soft_gt self.dt = h5py.special_dtype(vlen=str) # opening an hdf5 file with write access and writing metadata # self.hdf5_file = h5py.File(hdf5_name, "w") self.path_hdf5 = path_hdf5 list_patients = [] self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } self.prepro_transforms, self.transform = transform # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] for contrast in contrast_balance.keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_balance.keys()} # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_subject_fnames()))]['path'].to_list() # Load bounding box from list of path self.has_bounding_box = True bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, get_all_subj_path, slice_axis, contrast_lst) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() for subject in tqdm(subject_file_lst, desc="Loading dataset"): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] if contrast in (contrast_balance.keys()): c[contrast] = c[contrast] + 1 if c[contrast] / tot[contrast] > contrast_balance[contrast]: continue target_filename, roi_filename = [None] * len(target_suffix), None derivatives = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_derivatives( subject, all_deriv)))]['path'].to_list() for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if suffix in deriv: target_filename[idx] = deriv if not (roi_params["suffix"] is None) and roi_params["suffix"] in deriv: roi_filename = [deriv] if (not any(target_filename)) or ( not (roi_params["suffix"] is None) and (roi_filename is None)): continue metadata = df_sub.to_dict(orient='records')[0] metadata['contrast'] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( df_sub['path'].values[0])][0] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys() ]): continue # Get subj_id (prefix filename without modality suffix and extension) subj_id = subject.split('.')[0].split('_')[0] self.filename_pairs.append( (subj_id, [df_sub['path'].values[0]], target_filename, roi_filename, [metadata])) list_patients.append(subj_id) self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn # Update HDF5 metadata with h5py.File(self.path_hdf5, "w") as hdf5_file: hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) hdf5_file.attrs['slice_axis'] = slice_axis hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True), ('filter_empty_mask', False)] hdf5_file.attrs['metadata_choice'] = metadata_choice # Save images into HDF5 file self._load_filenames() print("Files loaded.")
def __init__(self, root_dir, subject_lst, target_suffix, contrast_lst, hdf5_name, contrast_balance=None, slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, object_detection_params=None, soft_gt=False): print("Starting conversion") # Getting all patients id self.bids_ds = bids.BIDS(root_dir) bids_subjects = [ s for s in self.bids_ds.get_subjects() if s.record["subject_id"] in subject_lst ] self.soft_gt = soft_gt self.dt = h5py.special_dtype(vlen=str) # opening an hdf5 file with write access and writing metadata self.hdf5_file = h5py.File(hdf5_name, "w") list_patients = [] self.filename_pairs = [] if metadata_choice == 'mri_params': self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } self.prepro_transforms, self.transform = transform # Create a list with the filenames for all contrasts and subjects subjects_tot = [] for subject in bids_subjects: subjects_tot.append(str(subject.record["absolute_path"])) # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: len([s for s in bids_subjects if s.record["modality"] == contrast]) for contrast in contrast_balance.keys() } # Create a counter that helps to balance the contrasts c = {contrast: 0 for contrast in contrast_balance.keys()} self.has_bounding_box = True bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, self.bids_ds.get_subjects(), slice_axis, contrast_lst) for subject in tqdm(bids_subjects, desc="Loading dataset"): if subject.record["modality"] in contrast_lst: # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance if subject.record["modality"] in contrast_balance.keys(): c[subject. record["modality"]] = c[subject.record["modality"]] + 1 if c[subject.record["modality"]] / tot[subject.record["modality"]] \ > contrast_balance[subject.record["modality"]]: continue if not subject.has_derivative("labels"): print("Subject without derivative, skipping.") continue derivatives = subject.get_derivatives("labels") target_filename, roi_filename = [None ] * len(target_suffix), None for deriv in derivatives: for idx, suffix in enumerate(target_suffix): if deriv.endswith(subject.record["modality"] + suffix + ".nii.gz"): target_filename[idx] = deriv if not (roi_params["suffix"] is None) and \ deriv.endswith(subject.record["modality"] + roi_params["suffix"] + ".nii.gz"): roi_filename = [deriv] if (not any(target_filename)) or ( not (roi_params["suffix"] is None) and (roi_filename is None)): continue if not subject.has_metadata(): print("Subject without metadata.") metadata = {} else: metadata = subject.metadata() # add contrast to metadata metadata['contrast'] = subject.record["modality"] if metadata_choice == 'mri_params': if not all([ imed_film.check_isMRIparam(m, metadata) for m in self.metadata.keys() ]): continue if len(bounding_box_dict): # Take only one bounding box for cropping metadata['bounding_box'] = bounding_box_dict[str( subject.record["absolute_path"])][0] self.filename_pairs.append( (subject.record["subject_id"], [subject.record.absolute_path ], target_filename, roi_filename, [metadata])) list_patients.append(subject.record["subject_id"]) self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn # Update HDF5 metadata self.hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) self.hdf5_file.attrs['slice_axis'] = slice_axis self.hdf5_file.attrs['slice_filter_fn'] = [ ('filter_empty_input', True), ('filter_empty_mask', False) ] self.hdf5_file.attrs['metadata_choice'] = metadata_choice # Save images into HDF5 file self._load_filenames() print("Files loaded.")
def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, model_params, slice_axis=2, nibabel_cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, patch_filter_fn=None, roi_params=None, multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False, is_input_dropout=False): self.roi_params = roi_params if roi_params is not None else \ {ROIParamsKW.SUFFIX: None, ROIParamsKW.SLICE_FILTER_ROI: None} self.soft_gt = soft_gt self.filename_pairs = [] if metadata_choice == MetadataKW.MRI_PARAMS: self.metadata = { "FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": [] } # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst subject_file_lst = sorted(subject_file_lst) df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)] # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = { contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] for contrast in contrast_params[ContrastParamsKW.BALANCE].keys() } # Create a counter that helps to balance the contrasts c = { contrast: 0 for contrast in contrast_params[ContrastParamsKW.BALANCE].keys() } # Get a list of subject_ids for multichannel_subjects (prefix filename without modality suffix and extension) subject_ids = [] for subject in subject_file_lst: subject_ids.append(subject.split('.')[0].split('_')[0]) subject_ids = sorted(list(set(subject_ids))) # Create multichannel_subjects dictionary for each subject_id multichannel_subjects = {} idx_dict = {} if multichannel: num_contrast = len(contrast_params[ContrastParamsKW.CONTRAST_LST]) for idx, contrast in enumerate( contrast_params[ContrastParamsKW.CONTRAST_LST]): idx_dict[contrast] = idx multichannel_subjects = { subject: { "absolute_paths": [None] * num_contrast, "deriv_path": None, "roi_filename": None, SubjectDictKW.METADATA: [None] * num_contrast } for subject in subject_ids } # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains( '|'.join(bids_df.get_subject_fnames()))]['path'].to_list() # Load bounding box from list of path bounding_box_dict = imed_obj_detect.load_bounding_boxes( object_detection_params, get_all_subj_path, slice_axis, contrast_params[ContrastParamsKW.CONTRAST_LST]) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() # Create filename_pairs for subject in tqdm(subject_file_lst, desc="Loading dataset"): df_sub, roi_filename, target_filename, metadata = self.create_filename_pair( multichannel_subjects, subject, c, tot, multichannel, df_subjects, contrast_params, target_suffix, all_deriv, bids_df, bounding_box_dict, idx_dict, metadata_choice) # Fill multichannel dictionary # subj_id is the filename without modality suffix and extension if multichannel: multichannel_subjects = self.fill_multichannel_dict( multichannel_subjects, subject, idx_dict, df_sub, roi_filename, target_filename, metadata) else: self.filename_pairs.append( ([df_sub['path'].values[0]], target_filename, roi_filename, [metadata])) if multichannel: for subject in multichannel_subjects.values(): if None not in subject["absolute_paths"]: self.filename_pairs.append( (subject["absolute_paths"], subject["deriv_path"], subject["roi_filename"], subject[SubjectDictKW.METADATA])) if not self.filename_pairs: raise Exception( 'No subjects were selected - check selection of parameters on config.json (e.g. center ' 'selected + target_suffix)') length = model_params[ ModelParamsKW. LENGTH_2D] if ModelParamsKW.LENGTH_2D in model_params else [] stride = model_params[ ModelParamsKW. STRIDE_2D] if ModelParamsKW.STRIDE_2D in model_params else [] super().__init__(self.filename_pairs, length, stride, slice_axis, nibabel_cache, transform, slice_filter_fn, patch_filter_fn, task, self.roi_params, self.soft_gt, is_input_dropout)