Exemple #1
0
def get_BIDSLayout_with_conf(dir_, **kwargs):
    """Get BIDSLayout with bids, derivatives, and pndni_bids configuration files loaded"""
    if "pndni_bids" not in bids_config.get_option("config_paths"):
        layout.add_config_paths(pndni_bids=resource_filename(
            'pndniworkflows', 'config/pndni_bids.json'))
    l = layout.BIDSLayout(dir_,
                          config=['bids', 'derivatives', 'pndni_bids'],
                          **kwargs)
    return l
Exemple #2
0
def synthetic(request):
    root = join(get_test_data_path(), 'synthetic')
    default_preproc = get_option('loop_preproc')
    if request.param == 'preproc':
        set_option('loop_preproc', True)
        layout = BIDSLayout((root, ['bids', 'derivatives']))
    else:
        set_option('loop_preproc', default_preproc)
        layout = BIDSLayout(root, exclude='derivatives')
    yield request.param, load_variables(layout, skip_empty=True)
    set_option('loop_preproc', default_preproc)
Exemple #3
0
    def __init__(self,
                 root,
                 validate=True,
                 index_associated=True,
                 include=None,
                 absolute_paths=True,
                 derivatives=False,
                 config=None,
                 sources=None,
                 **kwargs):

        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate
        self.metadata_index = MetadataIndex(self)
        self.derivatives = {}
        self.sources = listify(sources)

        # Validate arguments
        if not isinstance(root, six.string_types):
            raise ValueError("root argument must be a string specifying the"
                             " directory containing the BIDS dataset.")
        if not os.path.exists(root):
            raise ValueError("BIDS root does not exist: %s" % root)

        self.root = root

        target = os.path.join(self.root, 'dataset_description.json')
        if not os.path.exists(target):
            if validate is True:
                raise ValueError(
                    "'dataset_description.json' is missing from project root."
                    " Every valid BIDS dataset must have this file.")
            else:
                self.description = None
        else:
            with open(target, 'r', encoding='utf-8') as desc_fd:
                self.description = json.load(desc_fd)
            if validate is True:
                for k in ['Name', 'BIDSVersion']:
                    if k not in self.description:
                        raise ValueError("Mandatory '%s' field missing from "
                                         "dataset_description.json." % k)

        # Determine which subdirectories to exclude from indexing
        excludes = {"code", "stimuli", "sourcedata", "models", "derivatives"}
        if include is not None:
            include = listify(include)
            if "derivatives" in include:
                raise ValueError("Do not pass 'derivatives' in the include "
                                 "list. To index derivatives, either set "
                                 "derivatives=True, or use add_derivatives().")
            excludes -= set([d.strip(os.path.sep) for d in include])
        self._exclude_dirs = list(excludes)

        # Set up path and config for grabbit
        if config is None:
            config = 'bids'
        config_paths = get_option('config_paths')
        path = (root, [config_paths[c] for c in listify(config)])

        # Initialize grabbit Layout
        super(BIDSLayout, self).__init__(path,
                                         root=self.root,
                                         dynamic_getters=True,
                                         absolute_paths=absolute_paths,
                                         **kwargs)

        # Add derivatives if any are found
        self.derivatives = {}
        if derivatives:
            if derivatives is True:
                derivatives = os.path.join(root, 'derivatives')
            self.add_derivatives(derivatives,
                                 validate=validate,
                                 index_associated=index_associated,
                                 include=include,
                                 absolute_paths=absolute_paths,
                                 derivatives=None,
                                 config=None,
                                 sources=self,
                                 **kwargs)
Exemple #4
0
def _load_time_variables(layout,
                         dataset=None,
                         columns=None,
                         scan_length=None,
                         drop_na=True,
                         events=True,
                         physio=True,
                         stim=True,
                         confounds=True,
                         skip_empty=True,
                         **selectors):
    ''' Loads all variables found in *_events.tsv files and returns them as a
    BIDSVariableCollection.

    Args:
        layout (BIDSLayout): A BIDSLayout to scan.
        dataset (NodeIndex): A BIDS NodeIndex container. If None, a new one is
            initialized.
        columns (list): Optional list of names specifying which columns in the
            event files to read. By default, reads all columns found.
        scan_length (float): Optional duration of runs (in seconds). By
            default, this will be extracted from the BOLD image. However, in
            cases where the user doesn't have access to the images (e.g.,
            because only file handles are locally available), a fixed duration
            can be manually specified as a fallback.
        drop_na (bool): If True, removes all events where amplitude is n/a. If
            False, leaves n/a values intact. Note that in the latter case,
            transformations that requires numeric values may fail.
        events (bool): If True, extracts variables from events.tsv
            files.
        physio (bool): If True, extracts variables from _physio files.
        stim (bool): If True, extracts variables from _stim files.
        skip_empty (bool): Whether or not to skip empty Variables (i.e.,
            where there are no rows/records in a file, or all onsets,
            durations, and amplitudes are 0).
        selectors (dict): Optional keyword arguments passed onto the
            BIDSLayout instance's get() method; can be used to constrain
            which data are loaded.

    Returns: A NodeIndex instance.
    '''

    # Extract any non-keyword arguments
    kwargs = selectors.copy()
    domains = kwargs.get('domains', None)

    # Filter keyword args
    selectors = {k: v for k, v in selectors.items() if k in BASE_ENTITIES}

    if dataset is None:
        dataset = NodeIndex()

    if get_option('loop_preproc'):
        selectors['type'] = 'preproc'
        # Select any space, to only loop over each run once
        # Warning: If some spaces only apply to some runs, this may result in
        # unexpected behavior, althought his scenario is rare.
        spaces = layout.get_spaces(type='preproc')
        if len(spaces) > 1:
            selectors['space'] = spaces[0]

    else:
        selectors['modality'] = 'func'
        selectors['type'] = 'bold'

    images = layout.get(return_type='file', extensions='.nii.gz', **selectors)

    if not images:
        raise ValueError("No functional images that match criteria found.")

    # Main loop over images
    for img_f in images:

        entities = layout.files[img_f].entities

        # Run is not mandatory, but we need a default for proper indexing
        if 'run' in entities:
            entities['run'] = int(entities['run'])

        # Get duration of run: first try to get it directly from the image
        # header; if that fails, try to get NumberOfVolumes from the
        # run metadata; if that fails, look for a scan_length argument.
        try:
            img = nb.load(img_f)
            duration = img.shape[3] * img.header.get_zooms()[-1]
        except Exception as e:
            if scan_length is not None:
                duration = scan_length
            else:
                msg = ("Unable to extract scan duration from one or more "
                       "BOLD runs, and no scan_length argument was provided "
                       "as a fallback. Please check that the image files are "
                       "available, or manually specify the scan duration.")
                raise ValueError(msg)

        tr = layout.get_metadata(img_f,
                                 type='bold',
                                 domains=domains,
                                 full_search=True)['RepetitionTime']

        run = dataset.get_or_create_node('run',
                                         entities,
                                         image_file=img_f,
                                         duration=duration,
                                         repetition_time=tr)
        run_info = run.get_info()

        # Process event files
        if events:
            dfs = layout._get_nearest_helper(img_f,
                                             '.tsv',
                                             type='events',
                                             full_search=True,
                                             domains=domains)
            if dfs is not None:
                for _data in dfs:
                    _data = pd.read_table(_data, sep='\t')
                    if 'amplitude' in _data.columns:
                        if (_data['amplitude'].astype(int) == 1).all() and \
                                'trial_type' in _data.columns:
                            msg = ("Column 'amplitude' with constant value 1 "
                                   "is unnecessary in event files; ignoring "
                                   "it.")
                            _data = _data.drop('amplitude', axis=1)
                        else:
                            msg = ("Column name 'amplitude' is reserved; "
                                   "renaming "
                                   "it to 'amplitude_'.")
                            _data = _data.rename(
                                columns={'amplitude': 'amplitude_'})
                        warnings.warn(msg)

                    _data = _data.replace('n/a', np.nan)  # Replace BIDS' n/a
                    _data = _data.apply(pd.to_numeric, errors='ignore')

                    _cols = columns or list(
                        set(_data.columns.tolist()) - {'onset', 'duration'})

                    # Construct a DataFrame for each extra column
                    for col in _cols:
                        df = _data[['onset', 'duration']].copy()
                        df['amplitude'] = _data[col].values

                        # Add in all of the run's entities as new columns for
                        # index
                        for entity, value in entities.items():
                            if entity in BASE_ENTITIES:
                                df[entity] = value

                        if drop_na:
                            df = df.dropna(subset=['amplitude'])

                        if df.empty:
                            continue

                        var = SparseRunVariable(col, df, run_info, 'events')
                        run.add_variable(var)

        # Process confound files
        if confounds:
            sub_ents = {
                k: v
                for k, v in entities.items() if k in BASE_ENTITIES
            }
            confound_files = layout.get(type='confounds', **sub_ents)
            for cf in confound_files:
                _data = pd.read_csv(cf.filename, sep='\t', na_values='n/a')
                if columns is not None:
                    conf_cols = list(set(_data.columns) & set(columns))
                    _data = _data.loc[:, conf_cols]
                for col in _data.columns:
                    sr = 1. / run.repetition_time
                    var = DenseRunVariable(col, _data[[col]], run_info,
                                           'confounds', sr)
                    run.add_variable(var)

        # Process recordinging files
        if physio or stim:
            rec_types = ['physio'] if physio else []
            if stim:
                rec_types.append('stim')
            rec_files = layout.get_nearest(img_f,
                                           extensions='.tsv.gz',
                                           all_=True,
                                           type=rec_types,
                                           ignore_strict_entities=['type'],
                                           full_search=True,
                                           domains=domains)
            for rf in rec_files:
                metadata = layout.get_metadata(rf)
                if not metadata:
                    raise ValueError("No .json sidecar found for '%s'." % rf)
                data = pd.read_csv(rf, sep='\t')
                freq = metadata['SamplingFrequency']
                st = metadata['StartTime']
                rf_cols = metadata['Columns']
                data.columns = rf_cols

                # Filter columns if user passed names
                if columns is not None:
                    rf_cols = list(set(rf_cols) & set(columns))
                    data = data.loc[:, rf_cols]

                n_cols = len(rf_cols)
                if not n_cols:
                    continue

                # Keep only in-scan samples
                if st < 0:
                    start_ind = np.floor(-st * freq)
                    values = data.values[start_ind:, :]
                else:
                    values = data.values

                if st > 0:
                    n_pad = freq * st
                    pad = np.zeros((n_pad, n_cols))
                    values = np.r_[pad, values]

                n_rows = int(run.duration * freq)
                if len(values) > n_rows:
                    values = values[:n_rows, :]
                elif len(values) < n_rows:
                    pad = np.zeros((n_rows - len(values), n_cols))
                    values = np.r_[values, pad]

                df = pd.DataFrame(values, columns=rf_cols)
                source = 'physio' if '_physio.tsv' in rf else 'stim'
                for col in df.columns:
                    var = DenseRunVariable(col, df[[col]], run_info, source,
                                           freq)
                    run.add_variable(var)
    return dataset