Beispiel #1
0
    def _run_interface(self, runtime):
        import bids
        from bids.analysis import auto_model
        models = self.inputs.model
        if not isinstance(models, list):
            database_path = self.inputs.database_path
            layout = bids.BIDSLayout.load(database_path=database_path)

            if not isdefined(models):
                # model is not yet standardized, so validate=False
                # Ignore all subject directories and .git/ and .datalad/ directories
                indexer = bids.BIDSLayoutIndexer(
                    ignore=[re.compile(r'sub-'), re.compile(r'\.(git|datalad)')])
                small_layout = bids.BIDSLayout(
                    layout.root, derivatives=[d.root for d in layout.derivatives.values()],
                    validate=False,
                    indexer=indexer)
                # PyBIDS can double up, so find unique models
                models = list(set(small_layout.get(suffix='smdl', return_type='file')))
                if not models:
                    raise ValueError("No models found")
            elif models == 'default':
                models = auto_model(layout)

        models = [_ensure_model(m) for m in models]

        if self.inputs.selectors:
            # This is almost certainly incorrect
            models = [model for model in models
                      if all(val in model['Input'].get(key, [val])
                             for key, val in self.inputs.selectors.items())]

        self._results['model_spec'] = models

        return runtime
Beispiel #2
0
def run_fitlins(argv=None):
    import re
    from nipype import logging as nlogging

    warnings.showwarning = _warn_redirect
    opts = get_parser().parse_args(argv)

    force_index = [
        # If entry looks like `/<pattern>/`, treat `<pattern>` as a regex
        re.compile(ign[1:-1]) if (ign[0], ign[-1]) == ('/', '/') else ign
        # Iterate over empty tuple if undefined
        for ign in opts.force_index or ()
    ]
    ignore = [
        # If entry looks like `/<pattern>/`, treat `<pattern>` as a regex
        re.compile(ign[1:-1]) if (ign[0], ign[-1]) == ('/', '/') else ign
        # Iterate over empty tuple if undefined
        for ign in opts.ignore or ()
    ]

    log_level = 25 + 5 * (opts.quiet - opts.verbose)
    logger.setLevel(log_level)
    nlogging.getLogger('nipype.workflow').setLevel(log_level)
    nlogging.getLogger('nipype.interface').setLevel(log_level)
    nlogging.getLogger('nipype.utils').setLevel(log_level)

    if not opts.space:
        # make it an explicit None
        opts.space = None
    if not opts.desc_label:
        # make it an explicit None
        opts.desc_label = None

    ncpus = opts.n_cpus
    if ncpus < 1:
        ncpus = cpu_count()

    plugin_settings = {
        'plugin': 'MultiProc',
        'plugin_args': {
            'n_procs': ncpus,
            'raise_insufficient': False,
            'maxtasksperchild': 1,
        },
    }

    if opts.mem_gb:
        plugin_settings['plugin_args']['memory_gb'] = opts.mem_gb

    model = default_path(opts.model, opts.bids_dir, 'model-default_smdl.json')
    if opts.model in (None, 'default') and not op.exists(model):
        model = 'default'

    derivatives = True if not opts.derivatives else opts.derivatives
    # Need this when specifying args directly (i.e. neuroscout)
    # god bless neuroscout, but let's make it work for others!
    if isinstance(derivatives, list) and len(derivatives) == 1:
        # WRONG AND EVIL to those who have spaces in their paths... bad bad practice
        # TODO - fix neuroscout
        derivatives = derivatives[0].split(" ")

    if opts.estimator != 'afni':
        if opts.error_ts:
            raise NotImplementedError(
                "Saving the error time series is only implemented for"
                " the afni estimator. If this is a feature you want"
                f" for {opts.estimator} please let us know on github.")

    if opts.derivative_label:
        logger.warning('--derivative-label no longer has any effect; '
                       'set output directory name directly')
    os.makedirs(opts.output_dir, exist_ok=True)
    fub.write_derivative_description(opts.bids_dir, opts.output_dir,
                                     vars(opts))

    work_dir = mkdtemp() if opts.work_dir is None else opts.work_dir

    # Go ahead and initialize the layout database
    if opts.database_path is None:
        database_path = Path(work_dir) / 'dbcache'
        reset_database = True
    else:
        database_path = opts.database_path
        reset_database = False

    indexer = bids.BIDSLayoutIndexer(ignore=ignore, force_index=force_index)
    layout = bids.BIDSLayout(
        opts.bids_dir,
        derivatives=derivatives,
        database_path=database_path,
        reset_database=reset_database,
        indexer=indexer,
    )

    subject_list = None
    if opts.participant_label is not None:
        subject_list = fub.collect_participants(
            layout, participant_label=opts.participant_label)

    # Build main workflow
    logger.log(25, INIT_MSG(version=__version__, subject_list=subject_list))

    # TODO: Fix AUTO_MODEL
    # if model == 'default':
    #     models = auto_model(layout)
    # else:
    #     import json
    #     if op.exists(model):
    #         model_dict = json.loads(Path(model).read_text())
    #     models = [model_dict]

    model_dict = None
    if model == 'default':
        retcode = 1
        raise NotImplementedError(
            "The default model has not been implemented yet.")
    else:
        import json

        if op.exists(model):
            model_dict = json.loads(Path(model).read_text())

    if not model_dict:
        raise ValueError(
            f'model_dict cannot be empty. Invalid model filepath {model}.')

    graph = BIDSStatsModelsGraph(layout, model_dict)

    fitlins_wf = init_fitlins_wf(
        database_path,
        opts.output_dir,
        graph=graph,
        analysis_level=opts.analysis_level,
        model=model,
        space=opts.space,
        desc=opts.desc_label,
        participants=subject_list,
        base_dir=work_dir,
        smoothing=opts.smoothing,
        drop_missing=opts.drop_missing,
        drift_model=opts.drift_model,
        estimator=opts.estimator,
        errorts=opts.error_ts,
    )
    fitlins_wf.config = deepcopy(config.get_fitlins_config()._sections)

    if opts.work_dir:
        # dump crashes in working directory (non /tmp)
        fitlins_wf.config['execution']['crashdump_dir'] = opts.work_dir
    retcode = 0
    if not opts.reports_only:
        try:
            fitlins_wf.run(**plugin_settings)
        except Exception as e:
            logger.critical(f"FitLins failed: {e}")
            raise

    run_context = {
        'version': __version__,
        'command': ' '.join(sys.argv),
        'timestamp': time.strftime('%Y-%m-%d %H:%M:%S %z'),
    }

    selectors = {'desc': opts.desc_label, 'space': opts.space}
    if subject_list is not None:
        selectors['subject'] = subject_list

    graph.load_collections(**selectors)
    report_dict = build_report_dict(opts.output_dir, work_dir, graph)
    write_full_report(report_dict, run_context, opts.output_dir)

    return retcode
Beispiel #3
0
    def create_bids_dataframe(self):
        """Generate the dataframe."""

        # Suppress a Future Warning from pybids about leading dot included in 'extension' from version 0.14.0
        # The config_bids.json file used matches the future behavior
        # TODO: when reaching version 0.14.0, remove the following line
        pybids.config.set_option('extension_initial_dot', True)

        for path_data in self.paths_data:
            path_data = os.path.join(path_data, '')

            # Initialize BIDSLayoutIndexer and BIDSLayout
            # validate=True by default for both indexer and layout, BIDS-validator is not skipped
            # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions.
            # Force index of subject subfolders containing CT-scan files under "anat" or "ct" folder based on extensions and modality suffix.
            # TODO: remove force indexing of microscopy files after BEP microscopy is merged in BIDS
            # TODO: remove force indexing of CT-scan files after BEP CT-scan is merged in BIDS
            ext_microscopy = ('.png', '.ome.tif', '.ome.tiff', '.ome.tf2',
                              '.ome.tf8', '.ome.btf')
            ext_ct = ('.nii.gz', '.nii')
            suffix_ct = ('ct', 'CT')
            force_index = []
            for root, dirs, files in os.walk(path_data):
                for file in files:
                    # Microscopy
                    if file == "samples.tsv" or file == "samples.json":
                        force_index.append(file)
                    if (file.endswith(ext_microscopy)
                            and os.path.basename(root) == "microscopy" and
                        (root.replace(path_data, '').startswith("sub"))):
                        force_index.append(
                            os.path.join(root.replace(path_data, '')))
                    # CT-scan
                    if (file.endswith(ext_ct)
                            and file.split('.')[0].endswith(suffix_ct)
                            and (os.path.basename(root) == "anat"
                                 or os.path.basename(root) == "ct") and
                        (root.replace(path_data, '').startswith("sub"))):
                        force_index.append(
                            os.path.join(root.replace(path_data, '')))
            indexer = pybids.BIDSLayoutIndexer(force_index=force_index)

            if self.derivatives:
                self.write_derivatives_dataset_description(path_data)

            layout = pybids.BIDSLayout(path_data,
                                       config=self.bids_config,
                                       indexer=indexer,
                                       derivatives=self.derivatives)

            # Transform layout to dataframe with all entities and json metadata
            # As per pybids, derivatives don't include parsed entities, only the "path" column
            df_next = layout.to_df(metadata=True)

            # Add filename column
            df_next.insert(1, 'filename',
                           df_next['path'].apply(os.path.basename))

            # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering
            df_next = df_next[~df_next['filename'].str.
                              endswith(tuple(['.json', '.tsv', 'LICENSE']))]

            # Update dataframe with subject files of chosen contrasts and extensions,
            # and with derivative files of chosen target_suffix from loader parameters
            df_next = df_next[(
                ~df_next['path'].str.contains('derivatives')
                & df_next['suffix'].str.contains('|'.join(self.contrast_lst))
                & df_next['extension'].str.contains('|'.join(self.extensions)))
                              | (df_next['path'].str.contains('derivatives')
                                 & df_next['filename'].str.
                                 contains('|'.join(self.target_suffix)))]

            if df_next[~df_next['path'].str.contains('derivatives')].empty:
                # Warning if no subject files are found in path_data
                logger.warning(
                    "No subject files were found in '{}' dataset. Skipping dataset."
                    .format(path_data))

            else:
                # Add tsv files metadata to dataframe
                df_next = self.add_tsv_metadata(df_next, path_data, layout)

                # TODO: check if other files are needed for EEG and DWI

                # Merge dataframes
                self.df = pd.concat([self.df, df_next],
                                    join='outer',
                                    ignore_index=True)

        if self.df.empty:
            # Raise error and exit if no subject files are found in any path data
            raise RuntimeError(
                "No subject files found. Check selection of parameters in config.json"
                " and datasets compliance with BIDS specification.")

        # Drop duplicated rows based on all columns except 'path'
        # Keep first occurence
        columns = self.df.columns.to_list()
        columns.remove('path')
        self.df = self.df[~(
            self.df.astype(str).duplicated(subset=columns, keep='first'))]

        # If indexing of derivatives is true
        if self.derivatives:

            # Get list of subject files with available derivatives
            has_deriv, deriv = self.get_subjects_with_derivatives()

            # Filter dataframe to keep subjects files with available derivatives only
            if has_deriv:
                self.df = self.df[
                    self.df['filename'].str.contains('|'.join(has_deriv))
                    | self.df['filename'].str.contains('|'.join(deriv))]
            else:
                # Raise error and exit if no derivatives are found for any subject files
                raise RuntimeError("Derivatives not found.")

        # Reset index
        self.df.reset_index(drop=True, inplace=True)

        # Drop columns with all null values
        self.df.dropna(axis=1, inplace=True, how='all')
    def create_bids_dataframe(self):
        """Generate the dataframe."""

        for path_data in self.paths_data:
            path_data = Path(path_data, '')

            # Initialize BIDSLayoutIndexer and BIDSLayout
            # validate=True by default for both indexer and layout, BIDS-validator is not skipped
            # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions.
            # Force index of subject subfolders containing CT-scan files under "anat" or "ct" folder based on extensions and modality suffix.
            # TODO: remove force indexing of microscopy files after Microscopy-BIDS is integrated in pybids
            # TODO: remove force indexing of CT-scan files after BEP CT-scan is merged in BIDS
            ext_microscopy = ('.png', '.tif', '.ome.tif', '.ome.btf')
            ext_ct = ('.nii.gz', '.nii')
            suffix_ct = ('ct', 'CT')
            force_index = []
            for path_object in path_data.glob('**/*'):
                if path_object.is_file():
                    # Microscopy
                    subject_path_index = len(path_data.parts)
                    subject_path = path_object.parts[subject_path_index]
                    if path_object.name == "samples.tsv" or path_object.name == "samples.json":
                        force_index.append(path_object.name)
                    if (path_object.name.endswith(ext_microscopy) and path_object.parent.name == "micr" and
                        subject_path.startswith('sub')):
                        force_index.append(str(Path(*path_object.parent.parts[subject_path_index:])))
                    # CT-scan
                    if (path_object.name.endswith(ext_ct) and path_object.name.split('.')[0].endswith(suffix_ct) and
                            (path_object.parent.name == "anat" or path_object.parent.name == "ct") and
                            subject_path.startswith('sub')):
                        force_index.append(str(Path(*path_object.parent.parts[subject_path_index:])))
            indexer = pybids.BIDSLayoutIndexer(force_index=force_index, validate=self.bids_validate)

            if self.derivatives:
                self.write_derivatives_dataset_description(path_data)

            layout = pybids.BIDSLayout(str(path_data), config=self.bids_config, indexer=indexer,
                                       derivatives=self.derivatives)

            # Transform layout to dataframe with all entities and json metadata
            # As per pybids, derivatives don't include parsed entities, only the "path" column
            df_next = layout.to_df(metadata=True)

            # Add filename column
            df_next.insert(1, 'filename', df_next['path'].apply(os.path.basename))

            # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering
            df_next = df_next[~df_next['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))]

            # The following command updates the dataframe by doing 2 things:
            # 1. Keep only subject files of chosen contrasts (for files that are not in the 'derivatives' folder)
            #    (ex: '<dataset_path>/sub-XX/anat/sub-XX_T1w.nii.gz' with contrast_lst:["T1w"])
            # 2. Keep only derivatives files of chosen target_suffix (for files that are in the 'derivatives' folder)
            #    (ex: '<dataset_path>/derivatives/labels/sub-XX/anat/sub-XX_T1w_seg-manual.nii.gz' with target_suffix:["_seg-manual"])
            df_next = df_next[(~df_next['path'].str.contains('derivatives')
                               & df_next['suffix'].str.contains('|'.join(self.contrast_lst)))
                               | (df_next['path'].str.contains('derivatives')
                               & (df_next['filename'].str.split('.').apply(lambda x: x[0])).str.endswith(tuple(self.target_suffix)))]

            # Update dataframe with files of chosen extensions
            df_next = df_next[df_next['filename'].str.endswith(tuple(self.extensions))]

            # Warning if no subject files are found in path_data
            if df_next[~df_next['path'].str.contains('derivatives')].empty:
                logger.warning(f"No subject files were found in '{path_data}' dataset. Skipping dataset.")
            else:
                # Add tsv files metadata to dataframe
                df_next = self.add_tsv_metadata(df_next, path_data, layout)

                # TODO: check if other files are needed for EEG and DWI

                # Merge dataframes
                self.df = pd.concat([self.df, df_next], join='outer', ignore_index=True)

        if self.df.empty:
            # Raise error and exit if no subject files are found in any path data
            raise RuntimeError("No subject files found. Check selection of parameters in config.json"
                               " and datasets compliance with BIDS specification.")

        # Drop duplicated rows based on all columns except 'path'
        # Keep first occurence
        columns = self.df.columns.to_list()
        columns.remove('path')
        self.df = self.df[~(self.df.astype(str).duplicated(subset=columns, keep='first'))]

        # Remove subject files without the "split_method" metadata if specified and keep all derivatives
        if self.split_method:
            files_remove = (self.df[(
                                # Path does not contain derivative string (i.e. we only target subject raw data files)
                                ~self.df['path'].str.contains('derivatives')
                                # and split method metadata is null (i.e. the subject must have the split_method metadata or will be excluded)
                                & self.df[self.split_method].isnull())]
                                # Get these filesnames and convert to list.
                                ['filename']).tolist()
            if files_remove:
                logger.warning(f"The following files don't have the '{self.split_method}' metadata indicated as the "
                               f"split_method in the configuration JSON file. Skipping these files: {files_remove}")
                # Removing from dataframe all filenames which contain any of the file from files_remove field.
                self.df = self.df[~self.df['filename'].str.contains('|'.join(files_remove))]

        # If indexing of derivatives is true
        if self.derivatives:

            # Get list of subject files with available derivatives
            has_deriv, deriv = self.get_subjects_with_derivatives()

            # Filter dataframe to keep subjects files with available derivatives only
            if has_deriv:
                self.df = self.df[self.df['filename'].str.contains('|'.join(has_deriv))
                                  | self.df['filename'].str.contains('|'.join(deriv))]
            else:
                # Raise error and exit if no derivatives are found for any subject files
                raise RuntimeError("Derivatives not found.")

        # Reset index
        self.df.reset_index(drop=True, inplace=True)

        # Drop columns with all null values
        self.df.dropna(axis=1, inplace=True, how='all')
Beispiel #5
0
def create_bids_dataframe(loader_params, derivatives):
    """Create a dataframe containing all BIDS image files in a bids_path and their metadata.

    Args:
        loader_params (dict): Loader parameters, see :doc:`configuration_file` for more details.
        derivatives (bool): If True, derivatives are indexed.

    Returns:
        df (pd.DataFrame): Dataframe containing all BIDS image files indexed and their metadata.
    """

    # Get bids_path, bids_config, target_suffix, extensions and contrast_lst from loader parameters
    bids_path = loader_params['bids_path']
    bids_config = None if 'bids_config' not in loader_params else loader_params[
        'bids_config']
    target_suffix = loader_params['target_suffix']
    # If `target_suffix` is a list of lists convert to list
    if any(isinstance(t, list) for t in target_suffix):
        target_suffix = list(itertools.chain.from_iterable(target_suffix))
    extensions = loader_params['extensions']
    contrast_lst = loader_params["contrast_params"]["contrast_lst"]

    # Suppress a Future Warning from pybids about leading dot included in 'extension' from version 0.14.0
    # The config_bids.json file used matches the future behavior
    # TODO: when reaching version 0.14.0, remove the following line
    pybids.config.set_option('extension_initial_dot', True)

    # Initialize BIDSLayoutIndexer and BIDSLayout
    # validate=True by default for both indexer and layout, BIDS-validator is not skipped
    # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions.
    # TODO: remove force indexing of microscopy files after BEP microscopy is merged in BIDS
    ext_microscopy = ('.png', '.ome.tif', '.ome.tiff', '.ome.tf2', '.ome.tf8',
                      '.ome.btf')
    force_index = ['samples.tsv', 'samples.json']
    if not bids_path.endswith("/"):
        bids_path = bids_path + "/"
    for root, dirs, files in os.walk(bids_path):
        for file in files:
            if file.endswith(ext_microscopy) and (root.replace(
                    bids_path, '').startswith("sub")):
                force_index.append(os.path.join(root.replace(bids_path, '')))
    indexer = pybids.BIDSLayoutIndexer(force_index=force_index)
    layout = pybids.BIDSLayout(bids_path,
                               config=bids_config,
                               indexer=indexer,
                               derivatives=derivatives)

    # Transform layout to dataframe with all entities and json metadata
    # As per pybids, derivatives don't include parsed entities, only the "path" column
    df = layout.to_df(metadata=True)

    # Add filename and parent_path columns
    df['filename'] = df['path'].apply(os.path.basename)
    df['parent_path'] = df['path'].apply(os.path.dirname)

    # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering
    df = df[~df['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))]

    # Update dataframe with subject files of chosen contrasts and extensions,
    # and with derivative files of chosen target_suffix from loader parameters
    df = df[(~df['path'].str.contains('derivatives')
             & df['suffix'].str.contains('|'.join(contrast_lst))
             & df['extension'].str.contains('|'.join(extensions))) |
            (df['path'].str.contains('derivatives')
             & df['filename'].str.contains('|'.join(target_suffix)))]

    # Add metadata from participants.tsv file, if present
    # Uses pybids function
    if layout.get_collections(level='dataset'):
        df_participants = layout.get_collections(level='dataset',
                                                 merge=True).to_df()
        df_participants.drop(['suffix'], axis=1, inplace=True)
        df = pd.merge(df,
                      df_participants,
                      on='subject',
                      suffixes=("_x", None),
                      how='left')

    # Add metadata from samples.tsv file, if present
    # TODO: use pybids function after BEP microscopy is merged in BIDS
    fname_samples = os.path.join(bids_path, "samples.tsv")
    if os.path.exists(fname_samples):
        df_samples = pd.read_csv(fname_samples, sep='\t')
        df['participant_id'] = "sub-" + df['subject']
        df['sample_id'] = "sample-" + df['sample']
        df = pd.merge(df,
                      df_samples,
                      on=['participant_id', 'sample_id'],
                      suffixes=("_x", None),
                      how='left')
        df.drop(['participant_id', 'sample_id'], axis=1, inplace=True)

    # Add metadata from all _sessions.tsv files, if present
    # Uses pybids function
    if layout.get_collections(level='subject'):
        df_sessions = layout.get_collections(level='subject',
                                             merge=True).to_df()
        df_sessions.drop(['suffix'], axis=1, inplace=True)
        df = pd.merge(df,
                      df_sessions,
                      on=['subject', 'session'],
                      suffixes=("_x", None),
                      how='left')

    # Add metadata from all _scans.tsv files, if present
    # TODO: use pybids function after BEP microscopy is merged in BIDS
    # TODO: verify merge behavior with EEG and DWI scans files, tested with anat and microscopy only
    df_scans = pd.DataFrame()
    for root, dirs, files in os.walk(bids_path):
        for file in files:
            if file.endswith("scans.tsv"):
                df_temp = pd.read_csv(os.path.join(root, file), sep='\t')
                df_scans = pd.concat([df_scans, df_temp], ignore_index=True)
    if not df_scans.empty:
        df_scans['filename'] = df_scans['filename'].apply(os.path.basename)
        df = pd.merge(df,
                      df_scans,
                      on=['filename'],
                      suffixes=("_x", None),
                      how='left')

    # TODO: check if other files are needed for EEG and DWI

    # If indexing of derivatives is true
    # Get list of subject files with available derivatives
    if derivatives:
        subject_files = df[~df['path'].str.contains('derivatives'
                                                    )]['filename'].to_list()
        prefix_fnames = []
        [prefix_fnames.append(s.split('.')[0]) for s in subject_files]
        deriv = df[df['path'].str.contains('derivatives')]['filename'].tolist()
        has_deriv = []
        for p in prefix_fnames:
            available = [d for d in deriv if p in d]
            if available:
                has_deriv.append(p)
                for t in target_suffix:
                    if t not in str(available):
                        logger.warning(
                            "Missing target_suffix {} for subject {}.".format(
                                t, p))
            else:
                logger.warning(
                    "Missing derivatives for subject {}. Skipping subject.".
                    format(p))

        # Filter dataframe to keep subjects files with available derivatives only
        if has_deriv:
            df = df[df['filename'].str.contains('|'.join(has_deriv))]
        else:
            # Raise error and exit if no derivatives are found for any subject files
            raise RuntimeError("Derivatives not found.")

    # Reset index
    df.reset_index(drop=True, inplace=True)

    # Drop columns with all null values
    df.dropna(axis=1, inplace=True, how='all')

    return df