Ejemplo n.º 1
0
    def __init__(self,
                 bids_df,
                 subject_file_lst,
                 target_suffix,
                 contrast_lst,
                 path_hdf5,
                 contrast_balance=None,
                 slice_axis=2,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 transform=None,
                 object_detection_params=None,
                 soft_gt=False):
        print("Starting conversion")

        # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst
        subject_file_lst = sorted(subject_file_lst)
        df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)]
        # Backward compatibility for subject_file_lst containing participant_ids instead of filenames
        if df_subjects.empty:
            df_subjects = bids_df.df[bids_df.df['participant_id'].isin(
                subject_file_lst)]
            subject_file_lst = sorted(df_subjects['filename'].to_list())

        self.soft_gt = soft_gt
        self.dt = h5py.special_dtype(vlen=str)
        # opening an hdf5 file with write access and writing metadata
        # self.hdf5_file = h5py.File(hdf5_name, "w")
        self.path_hdf5 = path_hdf5
        list_patients = []

        self.filename_pairs = []
        self.metadata = {}

        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        self.prepro_transforms, self.transform = transform

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True]
            for contrast in contrast_balance.keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_balance.keys()}

        # Get all subjects path from bids_df for bounding box
        get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains(
            '|'.join(bids_df.get_subject_fnames()))]['path'].to_list()

        # Load bounding box from list of path
        self.has_bounding_box = True
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, get_all_subj_path, slice_axis,
            contrast_lst)

        # Get all derivatives filenames from bids_df
        all_deriv = bids_df.get_deriv_fnames()

        for subject in tqdm(subject_file_lst, desc="Loading dataset"):
            self.process_subject(bids_df, subject, df_subjects, c, tot,
                                 contrast_balance, target_suffix, all_deriv,
                                 roi_params, bounding_box_dict,
                                 metadata_choice, list_patients)

        self.slice_axis = slice_axis
        self.slice_filter_fn = slice_filter_fn

        # Update HDF5 metadata
        with h5py.File(self.path_hdf5, "w") as hdf5_file:
            hdf5_file.attrs.create('patients_id',
                                   list(set(list_patients)),
                                   dtype=self.dt)
            hdf5_file.attrs['slice_axis'] = slice_axis

            hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True),
                                                  ('filter_empty_mask', False)]
            hdf5_file.attrs['metadata_choice'] = metadata_choice

        # Save images into HDF5 file
        self._load_filenames()
        print("Files loaded.")
Ejemplo n.º 2
0
    def __init__(self,
                 root_dir,
                 subject_lst,
                 target_suffix,
                 contrast_params,
                 slice_axis=2,
                 cache=True,
                 transform=None,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 multichannel=False,
                 object_detection_params=None,
                 task="segmentation",
                 soft_gt=False):

        self.bids_ds = bids.BIDS(root_dir)
        self.roi_params = roi_params if roi_params is not None else {
            "suffix": None,
            "slice_filter_roi": None
        }
        self.soft_gt = soft_gt
        self.filename_pairs = []
        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        bids_subjects = [
            s for s in self.bids_ds.get_subjects()
            if s.record["subject_id"] in subject_lst
        ]

        # Create a list with the filenames for all contrasts and subjects
        subjects_tot = []
        for subject in bids_subjects:
            subjects_tot.append(str(subject.record["absolute_path"]))

        # Create a dictionary with the number of subjects for each contrast of contrast_balance

        tot = {
            contrast:
            len([s for s in bids_subjects if s.record["modality"] == contrast])
            for contrast in contrast_params["balance"].keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_params["balance"].keys()}

        multichannel_subjects = {}
        if multichannel:
            num_contrast = len(contrast_params["contrast_lst"])
            idx_dict = {}
            for idx, contrast in enumerate(contrast_params["contrast_lst"]):
                idx_dict[contrast] = idx
            multichannel_subjects = {
                subject: {
                    "absolute_paths": [None] * num_contrast,
                    "deriv_path": None,
                    "roi_filename": None,
                    "metadata": [None] * num_contrast
                }
                for subject in subject_lst
            }

        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, self.bids_ds.get_subjects(), slice_axis,
            contrast_params["contrast_lst"])

        for subject in tqdm(bids_subjects, desc="Loading dataset"):
            if subject.record["modality"] in contrast_params["contrast_lst"]:
                # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
                if subject.record["modality"] in contrast_params[
                        "balance"].keys():
                    c[subject.
                      record["modality"]] = c[subject.record["modality"]] + 1
                    if c[subject.record["modality"]] / tot[subject.record[
                            "modality"]] > contrast_params["balance"][
                                subject.record["modality"]]:
                        continue

                if not subject.has_derivative("labels"):
                    print("Subject without derivative, skipping.")
                    continue
                derivatives = subject.get_derivatives("labels")
                target_filename, roi_filename = [None
                                                 ] * len(target_suffix), None

                for deriv in derivatives:
                    for idx, suffix in enumerate(target_suffix):
                        if deriv.endswith(subject.record["modality"] + suffix +
                                          ".nii.gz"):
                            target_filename[idx] = deriv

                    if not (self.roi_params["suffix"] is None) and \
                            deriv.endswith(subject.record["modality"] + self.roi_params["suffix"] + ".nii.gz"):
                        roi_filename = [deriv]

                if (not any(target_filename)) or (
                        not (self.roi_params["suffix"] is None) and
                    (roi_filename is None)):
                    continue

                if not subject.has_metadata():
                    metadata = {}
                else:
                    metadata = subject.metadata()

                # add contrast to metadata
                metadata['contrast'] = subject.record["modality"]

                if len(bounding_box_dict):
                    # Take only one bounding box for cropping
                    metadata['bounding_box'] = bounding_box_dict[str(
                        subject.record["absolute_path"])][0]

                if metadata_choice == 'mri_params':
                    if not all([
                            imed_film.check_isMRIparam(m, metadata, subject,
                                                       self.metadata)
                            for m in self.metadata.keys()
                    ]):
                        continue

                elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None:
                    # add custom data to metadata
                    subject_id = subject.record["subject_id"]
                    df = bids.BIDS(root_dir).participants.content
                    if metadata_choice not in df.columns:
                        raise ValueError(
                            "The following metadata cannot be found in participants.tsv file: {}. "
                            "Invalid metadata choice.".format(metadata_choice))

                    metadata[metadata_choice] = df[
                        df['participant_id'] ==
                        subject_id][metadata_choice].values[0]

                    # Create metadata dict for OHE
                    data_lst = sorted(set(df[metadata_choice].values))
                    metadata_dict = {}
                    for idx, data in enumerate(data_lst):
                        metadata_dict[data] = idx

                    metadata['metadata_dict'] = metadata_dict

                # Fill multichannel dictionary
                if multichannel:
                    idx = idx_dict[subject.record["modality"]]
                    subj_id = subject.record["subject_id"]
                    multichannel_subjects[subj_id]["absolute_paths"][
                        idx] = subject.record.absolute_path
                    multichannel_subjects[subj_id][
                        "deriv_path"] = target_filename
                    multichannel_subjects[subj_id]["metadata"][idx] = metadata
                    if roi_filename:
                        multichannel_subjects[subj_id][
                            "roi_filename"] = roi_filename

                else:
                    self.filename_pairs.append(
                        ([subject.record.absolute_path], target_filename,
                         roi_filename, [metadata]))

        if multichannel:
            for subject in multichannel_subjects.values():
                if None not in subject["absolute_paths"]:
                    self.filename_pairs.append(
                        (subject["absolute_paths"], subject["deriv_path"],
                         subject["roi_filename"], subject["metadata"]))

        super().__init__(self.filename_pairs, slice_axis, cache, transform,
                         slice_filter_fn, task, self.roi_params, self.soft_gt)
Ejemplo n.º 3
0
    def __init__(self,
                 bids_df,
                 subject_file_lst,
                 target_suffix,
                 contrast_lst,
                 path_hdf5,
                 contrast_balance=None,
                 slice_axis=2,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 transform=None,
                 object_detection_params=None,
                 soft_gt=False):
        print("Starting conversion")

        # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst
        subject_file_lst = sorted(subject_file_lst)
        df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)]
        # Backward compatibility for subject_file_lst containing participant_ids instead of filenames
        if df_subjects.empty:
            df_subjects = bids_df.df[bids_df.df['participant_id'].isin(
                subject_file_lst)]
            subject_file_lst = sorted(df_subjects['filename'].to_list())

        self.soft_gt = soft_gt
        self.dt = h5py.special_dtype(vlen=str)
        # opening an hdf5 file with write access and writing metadata
        # self.hdf5_file = h5py.File(hdf5_name, "w")
        self.path_hdf5 = path_hdf5
        list_patients = []

        self.filename_pairs = []

        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        self.prepro_transforms, self.transform = transform

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True]
            for contrast in contrast_balance.keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_balance.keys()}

        # Get all subjects path from bids_df for bounding box
        get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains(
            '|'.join(bids_df.get_subject_fnames()))]['path'].to_list()

        # Load bounding box from list of path
        self.has_bounding_box = True
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, get_all_subj_path, slice_axis,
            contrast_lst)

        # Get all derivatives filenames from bids_df
        all_deriv = bids_df.get_deriv_fnames()

        for subject in tqdm(subject_file_lst, desc="Loading dataset"):

            df_sub = df_subjects.loc[df_subjects['filename'] == subject]

            # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
            contrast = df_sub['suffix'].values[0]
            if contrast in (contrast_balance.keys()):
                c[contrast] = c[contrast] + 1
                if c[contrast] / tot[contrast] > contrast_balance[contrast]:
                    continue

            target_filename, roi_filename = [None] * len(target_suffix), None

            derivatives = bids_df.df[bids_df.df['filename'].str.contains(
                '|'.join(bids_df.get_derivatives(
                    subject, all_deriv)))]['path'].to_list()

            for deriv in derivatives:
                for idx, suffix in enumerate(target_suffix):
                    if suffix in deriv:
                        target_filename[idx] = deriv
                if not (roi_params["suffix"] is
                        None) and roi_params["suffix"] in deriv:
                    roi_filename = [deriv]

            if (not any(target_filename)) or (
                    not (roi_params["suffix"] is None) and
                (roi_filename is None)):
                continue

            metadata = df_sub.to_dict(orient='records')[0]
            metadata['contrast'] = contrast

            if len(bounding_box_dict):
                # Take only one bounding box for cropping
                metadata['bounding_box'] = bounding_box_dict[str(
                    df_sub['path'].values[0])][0]

            if metadata_choice == 'mri_params':
                if not all([
                        imed_film.check_isMRIparam(m, metadata, subject,
                                                   self.metadata)
                        for m in self.metadata.keys()
                ]):
                    continue

            # Get subj_id (prefix filename without modality suffix and extension)
            subj_id = subject.split('.')[0].split('_')[0]

            self.filename_pairs.append(
                (subj_id, [df_sub['path'].values[0]], target_filename,
                 roi_filename, [metadata]))
            list_patients.append(subj_id)

        self.slice_axis = slice_axis
        self.slice_filter_fn = slice_filter_fn

        # Update HDF5 metadata
        with h5py.File(self.path_hdf5, "w") as hdf5_file:
            hdf5_file.attrs.create('patients_id',
                                   list(set(list_patients)),
                                   dtype=self.dt)
            hdf5_file.attrs['slice_axis'] = slice_axis

            hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True),
                                                  ('filter_empty_mask', False)]
            hdf5_file.attrs['metadata_choice'] = metadata_choice

        # Save images into HDF5 file
        self._load_filenames()
        print("Files loaded.")
Ejemplo n.º 4
0
    def __init__(self,
                 root_dir,
                 subject_lst,
                 target_suffix,
                 contrast_lst,
                 hdf5_name,
                 contrast_balance=None,
                 slice_axis=2,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 roi_params=None,
                 transform=None,
                 object_detection_params=None,
                 soft_gt=False):
        print("Starting conversion")
        # Getting all patients id
        self.bids_ds = bids.BIDS(root_dir)
        bids_subjects = [
            s for s in self.bids_ds.get_subjects()
            if s.record["subject_id"] in subject_lst
        ]
        self.soft_gt = soft_gt
        self.dt = h5py.special_dtype(vlen=str)
        # opening an hdf5 file with write access and writing metadata
        self.hdf5_file = h5py.File(hdf5_name, "w")

        list_patients = []

        self.filename_pairs = []

        if metadata_choice == 'mri_params':
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        self.prepro_transforms, self.transform = transform
        # Create a list with the filenames for all contrasts and subjects
        subjects_tot = []
        for subject in bids_subjects:
            subjects_tot.append(str(subject.record["absolute_path"]))

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            len([s for s in bids_subjects if s.record["modality"] == contrast])
            for contrast in contrast_balance.keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {contrast: 0 for contrast in contrast_balance.keys()}

        self.has_bounding_box = True
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, self.bids_ds.get_subjects(), slice_axis,
            contrast_lst)

        for subject in tqdm(bids_subjects, desc="Loading dataset"):

            if subject.record["modality"] in contrast_lst:

                # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance
                if subject.record["modality"] in contrast_balance.keys():
                    c[subject.
                      record["modality"]] = c[subject.record["modality"]] + 1
                    if c[subject.record["modality"]] / tot[subject.record["modality"]] \
                            > contrast_balance[subject.record["modality"]]:
                        continue

                if not subject.has_derivative("labels"):
                    print("Subject without derivative, skipping.")
                    continue
                derivatives = subject.get_derivatives("labels")

                target_filename, roi_filename = [None
                                                 ] * len(target_suffix), None

                for deriv in derivatives:
                    for idx, suffix in enumerate(target_suffix):
                        if deriv.endswith(subject.record["modality"] + suffix +
                                          ".nii.gz"):
                            target_filename[idx] = deriv

                    if not (roi_params["suffix"] is None) and \
                            deriv.endswith(subject.record["modality"] + roi_params["suffix"] + ".nii.gz"):
                        roi_filename = [deriv]

                if (not any(target_filename)) or (
                        not (roi_params["suffix"] is None) and
                    (roi_filename is None)):
                    continue

                if not subject.has_metadata():
                    print("Subject without metadata.")
                    metadata = {}
                else:
                    metadata = subject.metadata()
                    # add contrast to metadata
                metadata['contrast'] = subject.record["modality"]

                if metadata_choice == 'mri_params':
                    if not all([
                            imed_film.check_isMRIparam(m, metadata)
                            for m in self.metadata.keys()
                    ]):
                        continue

                if len(bounding_box_dict):
                    # Take only one bounding box for cropping
                    metadata['bounding_box'] = bounding_box_dict[str(
                        subject.record["absolute_path"])][0]

                self.filename_pairs.append(
                    (subject.record["subject_id"],
                     [subject.record.absolute_path
                      ], target_filename, roi_filename, [metadata]))

                list_patients.append(subject.record["subject_id"])

        self.slice_axis = slice_axis
        self.slice_filter_fn = slice_filter_fn

        # Update HDF5 metadata
        self.hdf5_file.attrs.create('patients_id',
                                    list(set(list_patients)),
                                    dtype=self.dt)
        self.hdf5_file.attrs['slice_axis'] = slice_axis

        self.hdf5_file.attrs['slice_filter_fn'] = [
            ('filter_empty_input', True), ('filter_empty_mask', False)
        ]
        self.hdf5_file.attrs['metadata_choice'] = metadata_choice

        # Save images into HDF5 file
        self._load_filenames()
        print("Files loaded.")
Ejemplo n.º 5
0
    def __init__(self,
                 bids_df,
                 subject_file_lst,
                 target_suffix,
                 contrast_params,
                 model_params,
                 slice_axis=2,
                 nibabel_cache=True,
                 transform=None,
                 metadata_choice=False,
                 slice_filter_fn=None,
                 patch_filter_fn=None,
                 roi_params=None,
                 multichannel=False,
                 object_detection_params=None,
                 task="segmentation",
                 soft_gt=False,
                 is_input_dropout=False):

        self.roi_params = roi_params if roi_params is not None else \
            {ROIParamsKW.SUFFIX: None, ROIParamsKW.SLICE_FILTER_ROI: None}
        self.soft_gt = soft_gt
        self.filename_pairs = []
        if metadata_choice == MetadataKW.MRI_PARAMS:
            self.metadata = {
                "FlipAngle": [],
                "RepetitionTime": [],
                "EchoTime": [],
                "Manufacturer": []
            }

        # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst
        subject_file_lst = sorted(subject_file_lst)
        df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)]

        # Create a dictionary with the number of subjects for each contrast of contrast_balance
        tot = {
            contrast:
            df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True]
            for contrast in contrast_params[ContrastParamsKW.BALANCE].keys()
        }

        # Create a counter that helps to balance the contrasts
        c = {
            contrast: 0
            for contrast in contrast_params[ContrastParamsKW.BALANCE].keys()
        }

        # Get a list of subject_ids for multichannel_subjects (prefix filename without modality suffix and extension)
        subject_ids = []
        for subject in subject_file_lst:
            subject_ids.append(subject.split('.')[0].split('_')[0])
        subject_ids = sorted(list(set(subject_ids)))

        # Create multichannel_subjects dictionary for each subject_id
        multichannel_subjects = {}
        idx_dict = {}
        if multichannel:
            num_contrast = len(contrast_params[ContrastParamsKW.CONTRAST_LST])
            for idx, contrast in enumerate(
                    contrast_params[ContrastParamsKW.CONTRAST_LST]):
                idx_dict[contrast] = idx
            multichannel_subjects = {
                subject: {
                    "absolute_paths": [None] * num_contrast,
                    "deriv_path": None,
                    "roi_filename": None,
                    SubjectDictKW.METADATA: [None] * num_contrast
                }
                for subject in subject_ids
            }

        # Get all subjects path from bids_df for bounding box
        get_all_subj_path = bids_df.df[bids_df.df['filename'].str.contains(
            '|'.join(bids_df.get_subject_fnames()))]['path'].to_list()

        # Load bounding box from list of path
        bounding_box_dict = imed_obj_detect.load_bounding_boxes(
            object_detection_params, get_all_subj_path, slice_axis,
            contrast_params[ContrastParamsKW.CONTRAST_LST])

        # Get all derivatives filenames from bids_df
        all_deriv = bids_df.get_deriv_fnames()

        # Create filename_pairs
        for subject in tqdm(subject_file_lst, desc="Loading dataset"):
            df_sub, roi_filename, target_filename, metadata = self.create_filename_pair(
                multichannel_subjects, subject, c, tot, multichannel,
                df_subjects, contrast_params, target_suffix, all_deriv,
                bids_df, bounding_box_dict, idx_dict, metadata_choice)
            # Fill multichannel dictionary
            # subj_id is the filename without modality suffix and extension
            if multichannel:
                multichannel_subjects = self.fill_multichannel_dict(
                    multichannel_subjects, subject, idx_dict, df_sub,
                    roi_filename, target_filename, metadata)
            else:
                self.filename_pairs.append(
                    ([df_sub['path'].values[0]], target_filename, roi_filename,
                     [metadata]))

        if multichannel:
            for subject in multichannel_subjects.values():
                if None not in subject["absolute_paths"]:
                    self.filename_pairs.append(
                        (subject["absolute_paths"], subject["deriv_path"],
                         subject["roi_filename"],
                         subject[SubjectDictKW.METADATA]))

        if not self.filename_pairs:
            raise Exception(
                'No subjects were selected - check selection of parameters on config.json (e.g. center '
                'selected + target_suffix)')

        length = model_params[
            ModelParamsKW.
            LENGTH_2D] if ModelParamsKW.LENGTH_2D in model_params else []
        stride = model_params[
            ModelParamsKW.
            STRIDE_2D] if ModelParamsKW.STRIDE_2D in model_params else []

        super().__init__(self.filename_pairs, length, stride, slice_axis,
                         nibabel_cache, transform, slice_filter_fn,
                         patch_filter_fn, task, self.roi_params, self.soft_gt,
                         is_input_dropout)