def match_variables(self, pattern, return_type='name', match_type='unix'):
        """Return columns whose names match the provided pattern.

        Parameters
        ----------
        pattern : str, list
            One or more regex patterns to match all variable names against.
        return_type : {'name', 'variable'}
            What to return. Must be one of:
            'name': Returns a list of names of matching variables.
            'variable': Returns a list of Variable objects whose names
            match.
        match_type : str
            Matching approach to use. Either 'regex' (full-blown regular
                expression matching) or 'unix' (unix-style pattern matching
                via the fnmatch module).

        Returns
        -------
        A list of all matching variables or variable names
        """
        pattern = listify(pattern)
        results = []
        for patt in pattern:
            if match_type.lower().startswith('re'):
                patt = re.compile(patt)
                vars_ = [v for v in self.variables.keys() if patt.search(v)]
            else:
                vars_ = fnmatch.filter(list(self.variables.keys()), patt)
            if return_type.startswith('var'):
                vars_ = [self.variables[v] for v in vars_]
            results.extend(vars_)
        return results
Example #2
0
File: munge.py Project: INCF/pybids
    def _transform(self, var, query, by=None):

        if by is None:
            by = []

        names = [var.name] + listify(by)

        # assure ordered dict so we have consistent (if not correct) operation,
        # because later we ask for name_map.values
        # pandas .query can't handle non-identifiers in variable names, so we
        # need to replace them in both the variable names and the query string.
        name_map = odict((n, re.sub('[^a-zA-Z0-9_]+', '_', n)) for n in names)
        for k, v in name_map.items():
            query = query.replace(k, v)

        data = pd.concat([self.collection[n].values for n in names],
                         axis=1, sort=True)
        # Make sure we can use integer index
        data = data.reset_index(drop=True)
        data.columns = list(name_map.values())
        data = data.query(query)

        # Truncate target variable to retained rows
        var.select_rows(data.index.values)

        return var
Example #3
0
    def merge(cls, variables, name=None, **kwargs):
        ''' Merge/concatenate a list of variables along the row axis.

        Args:
            variables (list): A list of Variables to merge.
            name (str): Optional name to assign to the output Variable. By
                default, uses the same name as the input variables.
            kwargs: Optional keyword arguments to pass onto the class-specific
                merge() call. See merge_variables docstring for details.

        Returns:
            A single BIDSVariable of the same class as the input variables.

        Notes: see merge_variables docstring for additional details.
        '''

        variables = listify(variables)
        if len(variables) == 1:
            return variables[0]

        var_names = set([v.name for v in variables])
        if len(var_names) > 1:
            raise ValueError("Columns with different names cannot be merged. "
                             "Column names provided: %s" % var_names)

        if name is None:
            name = variables[0].name

        return cls._merge(variables, name, **kwargs)
Example #4
0
    def _regex_replace_variables(self, args):
        """For each argument named in args, interpret the values set in the
        argument as regex patterns to potentially be replaced with variables
        that match the pattern. """

        args = listify(args)

        if 'variables' in args:
            args.remove('variables')
            variables = True
        else:
            variables = False

        # Ensure all keyword arguments user wants to scan are valid
        missing = set(args) - set(self.kwargs.keys())
        if missing:
            raise ValueError("Arguments '%s' specified for regex-based "
                             "variable name replacement, but were not found "
                             "among keyword arguments." % missing)

        def _replace_arg_values(names):
            variables = listify(names)
            variables = [self.collection.match_variables(c) for c in names]
            variables = itertools.chain(*variables)
            return list(set(variables))

        # 'variables' is stored separately, so handle it separately
        if variables:
            self.variables = _replace_arg_values(self.variables)

        for arg in args:
            self.kwargs[arg] = _replace_arg_values(self.kwargs[arg])
Example #5
0
    def _transform(self, var, query, by=None):

        if by is None:
            by = []

        names = [var.name] + listify(by)

        # assure ordered dict so we have consistent (if not correct) operation,
        # because later we ask for name_map.values
        # pandas .query can't handle non-identifiers in variable names, so we
        # need to replace them in both the variable names and the query string.
        name_map = odict((n, re.sub('[^a-zA-Z0-9_]+', '_', n)) for n in names)
        for k, v in name_map.items():
            query = query.replace(k, v)

        data = pd.concat([self.collection[n].values for n in names],
                         axis=1,
                         sort=True)
        # Make sure we can use integer index
        data = data.reset_index(drop=True)
        data.columns = list(name_map.values())
        data = data.query(query)

        # Truncate target variable to retained rows
        var.select_rows(data.index.values)

        return var
Example #6
0
def merge_collections(collections, force_dense=False, sampling_rate='auto'):
    ''' Merge two or more collections at the same level of analysis.

    Args:
        collections (list): List of Collections to merge.
        sampling_rate (int, str): Sampling rate to use if it becomes necessary
            to resample DenseRunVariables. Either an integer or 'auto' (see
            merge_variables docstring for further explanation).

    Returns:
        A BIDSVariableCollection or BIDSRunVariableCollection, depending
        on the type of the input collections.
    '''
    if len(listify(collections)) == 1:
        return collections

    levels = set([c.level for c in collections])
    if len(levels) > 1:
        raise ValueError("At the moment, it's only possible to merge "
                         "Collections at the same level of analysis. You "
                         "passed collections at levels: %s." % levels)

    variables = list(chain(*[c.variables.values() for c in collections]))
    cls = collections[0].__class__

    variables = cls.merge_variables(variables, sampling_rate=sampling_rate)

    if isinstance(collections[0], BIDSRunVariableCollection):
        return cls(variables, sampling_rate)

    return cls(variables)
Example #7
0
    def _transform(self, var, query, by=None):

        if by is None:
            by = []

        names = [var.name] + listify(by)

        # pandas .query can't handle non-identifiers in variable names, so we
        # need to replace them in both the variable names and the query string.
        name_map = {n: re.sub('[^a-zA-Z0-9_]+', '_', n) for n in names}
        for k, v in name_map.items():
            query = query.replace(k, v)

        data = pd.concat([self.collection[c].values for c in names],
                         axis=1,
                         sort=True)
        # Make sure we can use integer index
        data = data.reset_index(drop=True)
        data.columns = list(name_map.values())
        data = data.query(query)

        # Truncate target variable to retained rows
        var.select_rows(data.index.values)

        return var
Example #8
0
    def merge(cls, variables, name=None, **kwargs):
        ''' Merge/concatenate a list of variables along the row axis.

        Args:
            variables (list): A list of Variables to merge.
            name (str): Optional name to assign to the output Variable. By
                default, uses the same name as the input variables.
            kwargs: Optional keyword arguments to pass onto the class-specific
                merge() call. See merge_variables docstring for details.

        Returns:
            A single BIDSVariable of the same class as the input variables.

        Notes: see merge_variables docstring for additional details.
        '''

        variables = listify(variables)
        if len(variables) == 1:
            return variables[0]

        var_names = set([v.name for v in variables])
        if len(var_names) > 1:
            raise ValueError("Columns with different names cannot be merged. "
                             "Column names provided: %s" % var_names)

        if name is None:
            name = variables[0].name

        return cls._merge(variables, name, **kwargs)
Example #9
0
    def _transform(self, var, by, drop_orig=True):

        if not isinstance(var, SimpleVariable):
            self._densify_variables()

        # Set up all the splitting variables as a DF. Note that variables in
        # 'by' can be either regular variables, or entities in the index--so
        # we need to check both places.
        all_variables = self._variables
        by_variables = [
            all_variables[v].values
            if v in all_variables else var.index[v].reset_index(drop=True)
            for v in listify(by)
        ]
        group_data = pd.concat(by_variables, axis=1)
        group_data.columns = listify(by)

        # For sparse data, we need to set up a 1D grouper
        if isinstance(var, SimpleVariable):
            # Create single grouping variable by combining all 'by' variables
            if group_data.shape[1] == 1:
                group_labels = group_data.iloc[:, 0].values
            else:
                group_rows = group_data.astype(str).values.tolist()
                group_labels = ['_'.join(r) for r in group_rows]

            result = var.split(group_labels)

        # For dense data, use patsy to create design matrix, then multiply
        # it by target variable
        else:
            group_data = group_data.astype(str)
            formula = '0+' + '*'.join(listify(by))
            dm = dmatrix(formula, data=group_data, return_type='dataframe')
            result = var.split(dm)

        if drop_orig:
            self.collection.variables.pop(var.name)

        return result
def merge_collections(collections, sampling_rate='highest', output_level=None):
    """Merge two or more collections at the same level of analysis.

    Parameters
    ----------
    collections : list
        List of Collections to merge.
    sampling_rate : int or str
        Sampling rate to use if it becomes necessary
        to resample DenseRunVariables. Either an integer or 'highest' (see
        merge_variables docstring for further explanation).
    output_level : str, optional
        Assign a new level (e.g., 'run', 'subject', etc.) to the merged
        collection. If None, the current level is retained.

    Returns
    -------
    BIDSVariableCollection or BIDSRunVariableCollection
        Result type depends on the type of the input collections.
    """
    collections = listify(collections)
    if len(collections) == 1:
        return collections[0]

    levels = set([c.level for c in collections])
    if len(levels) > 1:
        raise ValueError("At the moment, it's only possible to merge "
                         "Collections at the same level of analysis. You "
                         "passed collections at levels: %s." % levels)

    variables = list(chain(*[c.variables.values() for c in collections]))
    cls = collections[0].__class__

    variables = cls.merge_variables(variables, sampling_rate=sampling_rate)

    if isinstance(collections[0], BIDSRunVariableCollection):
        # 'auto' was renamed to 'highest' circa 0.10, but check for both
        if sampling_rate in {'auto', 'highest'}:
            rates = [
                var.sampling_rate for var in variables
                if isinstance(var, DenseRunVariable)
            ]

            sampling_rate = rates[0] if rates else None

        return cls(variables, sampling_rate)

    # For non-run collections, we may need to set a different output level
    coll = cls(variables)
    if output_level is not None:
        coll.level = output_level
    return coll
Example #11
0
    def _transform(self, var, by):

        if not isinstance(var, SimpleVariable):
            self._densify_variables()

        # Set up all the splitting variables as a DF. Note that variables in
        # 'by' can be either regular variables, or entities in the index--so
        # we need to check both places.
        all_variables = self._variables
        by_variables = [all_variables[v].values if v in all_variables
                        else var.index[v].reset_index(drop=True)
                        for v in listify(by)]
        group_data = pd.concat(by_variables, axis=1, sort=True)
        group_data.columns = listify(by)

        # Use patsy to create splitting design matrix
        group_data = group_data.astype(str)
        formula = '0+' + ':'.join(listify(by))
        dm = dmatrix(formula, data=group_data, return_type='dataframe')
        dm.columns = [col.replace(':', '.') for col in dm.columns]

        return var.split(dm)
Example #12
0
File: munge.py Project: INCF/pybids
    def _transform(self, var, by, drop_orig=True):

        if not isinstance(var, SimpleVariable):
            self._densify_variables()

        # Set up all the splitting variables as a DF. Note that variables in
        # 'by' can be either regular variables, or entities in the index--so
        # we need to check both places.
        all_variables = self._variables
        by_variables = [all_variables[v].values if v in all_variables
                        else var.index[v].reset_index(drop=True)
                        for v in listify(by)]
        group_data = pd.concat(by_variables, axis=1, sort=True)
        group_data.columns = listify(by)

        # For sparse data, we need to set up a 1D grouper
        if isinstance(var, SimpleVariable):
            # Create single grouping variable by combining all 'by' variables
            if group_data.shape[1] == 1:
                group_labels = group_data.iloc[:, 0].values
            else:
                group_rows = group_data.astype(str).values.tolist()
                group_labels = ['_'.join(r) for r in group_rows]

            result = var.split(group_labels)

        # For dense data, use patsy to create design matrix, then multiply
        # it by target variable
        else:
            group_data = group_data.astype(str)
            formula = '0+' + '*'.join(listify(by))
            dm = dmatrix(formula, data=group_data, return_type='dataframe')
            result = var.split(dm)

        if drop_orig:
            self.collection.variables.pop(var.name)

        return result
Example #13
0
    def _clone_columns(self):
        ''' Deep copy all columns the transformation touches. This prevents us
        from unnecessarily overwriting existing columns. '''

        # Always clone the target columns
        self._columns = {c: self.collection[c].clone() for c in self.cols}

        if not self._columns_used:
            return

        # Loop over argument names and clone all column names in each one
        for var in self._columns_used:
            for c in listify(self.kwargs.get(var, [])):
                self._columns[c] = deepcopy(self.collection[c])
Example #14
0
    def _transform(self, col, other):

        other = listify(other)

        # Set up X matrix and slice into it based on target column indices
        X = np.array([self._columns[c].values.values.squeeze()
                      for c in other]).T
        X = X[col.index, :]
        assert len(X) == len(col)
        y = col.values
        _aX = np.c_[np.ones(len(y)), X]
        coefs, resids, rank, s = np.linalg.lstsq(_aX, y)
        result = pd.DataFrame(y - X.dot(coefs[1:]), index=col.index)
        return result
Example #15
0
    def _transform(self, var, other):

        other = listify(other)

        # Set up X matrix and slice into it based on target variable indices
        X = np.array(
            [self._variables[c].values.values.squeeze() for c in other]).T
        X = X[var.index, :]
        assert len(X) == len(var)
        y = var.values
        _aX = np.c_[np.ones(len(y)), X]
        coefs, resids, rank, s = np.linalg.lstsq(_aX, y, rcond=None)
        result = pd.DataFrame(y - X.dot(coefs[1:]), index=var.index)
        return result
Example #16
0
    def fit(self, spatialimage):
        r"""
        Generate the interpolation matrix (and the VSM with it).

        Implements Eq. :math:`\eqref{eq:1}`, interpolating :math:`f(\mathbf{s})`
        for all voxels in the target-image's extent.

        Returns
        -------
        updated : :obj:`bool`
            ``True`` if the internal field representation was fit,
            ``False`` if cache was valid and will be reused.

        """
        # Calculate the physical coordinates of target grid
        if isinstance(spatialimage, (str, bytes, Path)):
            spatialimage = nb.load(spatialimage)

        if self.shifts is not None:
            newaff = spatialimage.affine
            newshape = spatialimage.shape

            if np.all(newshape == self.shifts.shape) and np.allclose(
                    newaff, self.shifts.affine):
                return False

        weights = []
        coeffs = []

        # Generate tensor-product B-Spline weights
        for level in listify(self.coeffs):
            self.xfm.reference = spatialimage
            moved_cs = level.__class__(level.dataobj,
                                       self.xfm.matrix @ level.affine,
                                       level.header)
            wmat = grid_bspline_weights(spatialimage, moved_cs)
            weights.append(wmat)
            coeffs.append(level.get_fdata(dtype="float32").reshape(-1))

        # Interpolate the VSM (voxel-shift map)
        vsm = np.zeros(spatialimage.shape[:3], dtype="float32")
        vsm = (
            np.squeeze(np.hstack(coeffs).T) @ sparse_vstack(weights)).reshape(
                vsm.shape)

        # Cache
        self.shifts = nb.Nifti1Image(vsm, spatialimage.affine, None)
        self.shifts.header.set_intent("estimate", name="Voxel shift")
        self.shifts.header.set_xyzt_units("mm")
        return True
Example #17
0
def load_variables(layout, levels=None, merge=False, target=None, **kwargs):
    ''' A convenience wrapper for one or more load_*_variables() calls.
    Args:
        layout (BIDSLayout): BIDSLayout containing variable files.
        levels (str, list): Level or list of levels to load variables for.
            Valid values are 'time', 'run', 'session', and 'subject'.
        merge (bool): If True, the requested levels are merged into a single
            BIDSVariableCollection before returning. Ignored if only one
            level is requested.
        target (str): If merge=True, target indicates the level that defines
            the granularity of the result. See merge_collections for further
            explanation.
        kwargs: Optional keyword arguments to pass onto the individual
            load_*_variables() calls.
    Returns:
        If only a single level is passed, or merge is True, a single
            BIDSVariableCollection. If a list of levels is passed and merge is
            False, a dict is returned, with level names in keys and
            BIDSVariableCollections in values.
    '''

    ALL_LEVELS = ['time', 'run', 'session', 'subject']

    if levels is None:
        levels = ALL_LEVELS

    _levels = listify(levels)

    func_map = {
        'time': load_event_variables,
        'run': load_run_variables,
        'session': load_session_variables,
        'subject': load_subject_variables
    }

    bad_levels = set(_levels) - set(ALL_LEVELS)
    if bad_levels:
        raise ValueError("Invalid level names: %s" % bad_levels)

    collections = [func_map[l](layout, **kwargs) for l in _levels]

    if len(collections) == 1:
        return collections[0]

    # if merge:
    #     return merge_collections(collections, target=target)

    return dict(zip(_levels, collections))
Example #18
0
    def _densify_variables(self):

        variables = []

        for var in self._densify:

            if var == 'variables':
                variables.extend(self.variables)
            else:
                variables.extend(listify(self.kwargs.get(var, [])))

        for v in variables:
            var = self._variables[v]
            if isinstance(var, SparseRunVariable):
                sr = self.collection.sampling_rate
                self._variables[v] = var.to_dense(sr)
Example #19
0
        def _replace_arg_values(values):
            is_iter = isinstance(values, (list, tuple))
            values = listify(values)
            result = []
            # Only try to match strings containing a relevant special character
            for v in values:
                if isinstance(v, str) and re.search('[\*\?\[\]]', v):
                    result.append(self.collection.match_variables(v))
                else:
                    result.append([v])

            result = list(itertools.chain(*result))
            # Don't return a list unless we have to
            if is_iter or len(result) > 1:
                return result
            return result[0]
Example #20
0
    def _densify_columns(self):

        from bids.analysis.variables import SparseEventColumn

        cols = []

        for var in self._densify:

            if var == 'cols':
                cols.extend(self.cols)
            else:
                cols.extend(listify(self.kwargs.get(var, [])))

        for c in cols:
            col = self._columns[c]
            if isinstance(col, SparseEventColumn):
                self._columns[c] = col.to_dense()
Example #21
0
def merge_collections(collections, force_dense=False, sampling_rate='auto'):
    """Merge two or more collections at the same level of analysis.

    Parameters
    ----------
    collections : list
        List of Collections to merge.
    sampling_rate : int or str
        Sampling rate to use if it becomes necessary
        to resample DenseRunVariables. Either an integer or 'auto' (see
        merge_variables docstring for further explanation).

    Returns
    -------
    BIDSVariableCollection or BIDSRunVariableCollection
        Result type depends on the type of the input collections.
    """
    if len(listify(collections)) == 1:
        return collections

    levels = set([c.level for c in collections])
    if len(levels) > 1:
        raise ValueError("At the moment, it's only possible to merge "
                         "Collections at the same level of analysis. You "
                         "passed collections at levels: %s." % levels)

    variables = list(chain(*[c.variables.values() for c in collections]))
    cls = collections[0].__class__

    variables = cls.merge_variables(variables, sampling_rate=sampling_rate)

    if isinstance(collections[0], BIDSRunVariableCollection):
        if sampling_rate == 'auto':
            rates = [
                var.sampling_rate for var in variables
                if isinstance(var, DenseRunVariable)
            ]

            sampling_rate = rates[0] if rates else None

        return cls(variables, sampling_rate)

    return cls(variables)
Example #22
0
    def _check_categorical_columns(self):
        ''' Convert categorical columns to dummy-coded indicators. '''

        # Collect column names to pass through
        pass_thru = []
        if self._allow_categorical is not None:
            for arg in self._allow_categorical:
                keys = self.cols if arg == 'cols' else self.kwargs.get(arg, [])
                pass_thru.extend(listify(keys))
        pass_thru = list(set(pass_thru))

        for name, col in self._columns.items():
            if name not in pass_thru:
                if col.values.values.dtype.kind not in 'bifc':
                    msg = ("The %s transformation does not allow column '%s' "
                           "to be categorical. Eithe pass a different column, "
                           "or explicitly convert to a set of binary "
                           "indicators via the 'factor' transformation.")
                    raise ValueError(msg % (self.__class__.__name__, name))
Example #23
0
    def _clone_variables(self):
        ''' Deep copy all variables the transformation touches. This prevents us
        from unnecessarily overwriting existing variables. '''

        # Always clone the target variables
        self._variables = {v: self.collection[v].clone()
                           for v in self.variables}

        if not self._variables_used:
            return

        # Loop over argument names and clone all variable names in each one
        for var in self._variables_used:
            for v in listify(self.kwargs.get(var, [])):
                # Kludge: we need to allow entity variables to be passed as
                # names even though they don't exist as separate variables
                if (v not in self.collection.variables and
                        v in ['task', 'run', 'session', 'subject']):
                    continue
                self._variables[v] = deepcopy(self.collection[v])
Example #24
0
def merge_collections(collections, force_dense=False, sampling_rate='auto'):
    ''' Merge two or more collections at the same level of analysis.

    Args:
        collections (list): List of Collections to merge.
        sampling_rate (int, str): Sampling rate to use if it becomes necessary
            to resample DenseRunVariables. Either an integer or 'auto' (see
            merge_variables docstring for further explanation).

    Returns:
        A BIDSVariableCollection or BIDSRunVariableCollection, depending
        on the type of the input collections.
    '''
    if len(listify(collections)) == 1:
        return collections

    levels = set([c.level for c in collections])
    if len(levels) > 1:
        raise ValueError("At the moment, it's only possible to merge "
                         "Collections at the same level of analysis. You "
                         "passed collections at levels: %s." % levels)

    variables = list(chain(*[c.variables.values() for c in collections]))
    cls = collections[0].__class__

    variables = cls.merge_variables(variables, sampling_rate=sampling_rate)

    if isinstance(collections[0], BIDSRunVariableCollection):
        if sampling_rate == 'auto':
            rates = [var.sampling_rate for var in variables
                     if isinstance(var, DenseRunVariable)]

            sampling_rate = rates[0] if rates else None

        return cls(variables, sampling_rate)

    return cls(variables)
Example #25
0
def _load_tsv_variables(layout, type_, dataset=None, columns=None,
                        prepend_type=False, **selectors):
    ''' Reads variables from scans.tsv, sessions.tsv, and participants.tsv.

    Args:
        layout (BIDSLayout): The BIDSLayout to use.
        type_ (str): The type of file to read from. Must be one of 'scans',
            'sessions', or 'participants'.
        dataset (NodeIndex): A BIDS NodeIndex container. If None, a new one is
            initialized.
        columns (list): Optional list of names specifying which columns in the
            files to return. If None, all columns are returned.
        prepend_type (bool): If True, variable names are prepended with the
            type name (e.g., 'age' becomes 'participants.age').
        selectors (dict): Optional keyword arguments passed onto the
            BIDSLayout instance's get() method; can be used to constrain
            which data are loaded.

    Returns: A NodeIndex instance.
    '''

    # Sanitize the selectors: only keep entities at current level or above
    remap = {'scans': 'run', 'sessions': 'session', 'participants': 'subject'}
    level = remap[type_]
    valid_entities = BASE_ENTITIES[:BASE_ENTITIES.index(level)]
    layout_kwargs = {k: v for k, v in selectors.items() if k in valid_entities}

    if dataset is None:
        dataset = NodeIndex()

    files = layout.get(extensions='.tsv', return_type='file', type=type_,
                       **layout_kwargs)

    for f in files:

        f = layout.files[f]
        _data = pd.read_table(f.path, sep='\t')

        # Entities can be defined either within the first column of the .tsv
        # file (for entities that vary by row), or from the full file path
        # (for entities constant over all rows in the file). We extract both
        # and store them in the main DataFrame alongside other variables (as
        # they'll be extracted when the Column is initialized anyway).
        for ent_name, ent_val in f.entities.items():
            if ent_name in BASE_ENTITIES:
                _data[ent_name] = ent_val

        # Handling is a bit more convoluted for scans.tsv, because the first
        # column contains the run filename, which we also need to parse.
        if type_ == 'scans':
            image = _data['filename']
            _data = _data.drop('filename', axis=1)
            dn = f.dirname
            paths = [join(dn, p) for p in image.values]
            ent_recs = [layout.files[p].entities for p in paths
                        if p in layout.files]
            ent_cols = pd.DataFrame.from_records(ent_recs)
            _data = pd.concat([_data, ent_cols], axis=1)
            # It's possible to end up with duplicate entity columns this way
            _data = _data.T.drop_duplicates().T

        # The BIDS spec requires ID columns to be named 'session_id', 'run_id',
        # etc., and IDs begin with entity prefixes (e.g., 'sub-01'). To ensure
        # consistent internal handling, we strip these suffixes and prefixes.
        elif type_ == 'sessions':
            _data = _data.rename(columns={'session_id': 'session'})
            _data['session'] = _data['session'].str.replace('ses-', '')
        elif type_ == 'participants':
            _data = _data.rename(columns={'participant_id': 'subject'})
            _data['subject'] = _data['subject'].str.replace('sub-', '')

        # Filter rows on all selectors
        comm_cols = list(set(_data.columns) & set(selectors.keys()))
        for col in comm_cols:
            vals = listify(selectors.get(col))
            _data = _data.query('%s in @vals' % col)

        level = {'scans': 'session', 'sessions': 'subject',
                 'participants': 'dataset'}[type_]
        node = dataset.get_or_create_node(level, f.entities)

        ent_cols = list(set(ALL_ENTITIES) & set(_data.columns))
        amp_cols = list(set(_data.columns) - set(ent_cols))

        if columns is not None:
            amp_cols = list(set(amp_cols) & set(columns))

        for col_name in amp_cols:

            # Rename colummns: values must be in 'amplitude'
            df = _data.loc[:, [col_name] + ent_cols]
            df.columns = ['amplitude'] + ent_cols

            if prepend_type:
                col_name = '%s.%s' % (type_, col_name)

            node.add_variable(SimpleVariable(col_name, df, type_))

    return dataset
Example #26
0
def load_variables(layout, types=None, levels=None, skip_empty=True, **kwargs):
    ''' A convenience wrapper for one or more load_*_variables() calls.

    Args:
        layout (BIDSLayout): BIDSLayout containing variable files.
        types (str, list): Types of variables to retrieve. All valid values
            reflect the filename stipulated in the BIDS spec for each kind of
            variable. Valid values include: 'events', 'physio', 'stim',
            'scans', 'participants', 'sessions', and 'confounds'.
        levels (str, list): Optional level(s) of variables to load. Valid
            values are 'run', 'session', 'subject', or 'dataset'. This is
            simply a shorthand way to specify types--e.g., 'run' will be
            converted to types=['events', 'physio', 'stim', 'confounds'].
        skip_empty (bool): Whether or not to skip empty Variables (i.e.,
            where there are no rows/records in a file after applying any
            filtering operations like dropping NaNs).
        kwargs: Optional keyword arguments to pass onto the individual
            load_*_variables() calls.

    Returns:
        A NodeIndex instance.

    Example:
        >>> load_variables(layout, ['events', 'physio'], subject='01')
        # returns all variables stored in _events.tsv and _physio.tsv.gz files
        # for runs that belong to subject with id '01'.
    '''

    TYPES = ['events', 'physio', 'stim', 'scans', 'participants', 'sessions',
             'confounds']

    types = listify(types)

    if types is None:
        if levels is not None:
            types = []
            lev_map = {
                'run': ['events', 'physio', 'stim', 'confounds'],
                'session': ['scans'],
                'subject': ['sessions'],
                'dataset': ['participants']
            }
            [types.extend(lev_map[l]) for l in listify(levels)]
        else:
            types = TYPES

    bad_types = set(types) - set(TYPES)
    if bad_types:
        raise ValueError("Invalid variable types: %s" % bad_types)

    dataset = NodeIndex()

    run_types = list({'events', 'physio', 'stim', 'confounds'} - set(types))
    type_flags = {t: False for t in run_types}
    if len(type_flags) < 4:
        _kwargs = kwargs.copy()
        _kwargs.update(type_flags)
        dataset = _load_time_variables(layout, dataset, **_kwargs)

    for t in ({'scans', 'sessions', 'participants'} & set(types)):
        dataset = _load_tsv_variables(layout, t, dataset, **kwargs)

    return dataset
Example #27
0
def load_variables(layout,
                   types=None,
                   levels=None,
                   skip_empty=True,
                   dataset=None,
                   scope='all',
                   **kwargs):
    """A convenience wrapper for one or more load_*_variables() calls.

    Parameters
    ----------
    layout : :obj:`bids.layout.BIDSLayout`
        BIDSLayout containing variable files.
    types : str or list
        Types of variables to retrieve. All valid values
        reflect the filename stipulated in the BIDS spec for each kind of
        variable. Valid values include: 'events', 'physio', 'stim',
        'scans', 'participants', 'sessions', and 'regressors'.
    levels : str or list
        Optional level(s) of variables to load. Valid
        values are 'run', 'session', 'subject', or 'dataset'. This is
        simply a shorthand way to specify types--e.g., 'run' will be
        converted to types=['events', 'physio', 'stim', 'regressors'].
    skip_empty : bool
        Whether or not to skip empty Variables (i.e.,
        where there are no rows/records in a file after applying any
        filtering operations like dropping NaNs).
    dataset : NodeIndex
        An existing NodeIndex container to store the
        loaded data in. Can be used to iteratively construct a dataset
        that contains otherwise heterogeneous sets of variables. If None,
        a new NodeIndex is used.
    scope : str or list
        The scope of the space to search for variables. See
        docstring for BIDSLayout for details and valid predefined values.
    kwargs : dict
        Optional keyword arguments to pass onto the individual
        load_*_variables() calls.

    Returns
    -------
    A NodeIndex instance.

    Examples
    --------
    >>> load_variables(layout, ['events', 'physio'], subject='01')  # doctest: +SKIP
    # returns all variables stored in _events.tsv and _physio.tsv.gz files
    # for runs that belong to subject with id '01'.
    """

    TYPES = [
        'events', 'physio', 'stim', 'scans', 'participants', 'sessions',
        'regressors'
    ]

    types = listify(types)

    if types is None:
        if levels is not None:
            types = []
            lev_map = {
                'run': ['events', 'physio', 'stim', 'regressors'],
                'session': ['scans'],
                'subject': ['sessions'],
                'dataset': ['participants']
            }
            [types.extend(lev_map[l.lower()]) for l in listify(levels)]
        else:
            types = TYPES

    bad_types = set(types) - set(TYPES)
    if bad_types:
        raise ValueError("Invalid variable types: %s" % bad_types)

    dataset = dataset or NodeIndex()

    run_types = list({'events', 'physio', 'stim', 'regressors'} - set(types))
    type_flags = {t: False for t in run_types}
    if len(type_flags) < 4:
        _kwargs = kwargs.copy()
        _kwargs.update(type_flags)
        dataset = _load_time_variables(layout, dataset, scope=scope, **_kwargs)

    for t in ({'scans', 'sessions', 'participants'} & set(types)):
        kwargs.pop('suffix', None)  # suffix is always one of values above
        dataset = _load_tsv_variables(layout,
                                      t,
                                      dataset,
                                      scope=scope,
                                      **kwargs)

    return dataset
Example #28
0
class FieldmapEstimation:
    """
    Represent fieldmap estimation strategies.

    This class provides a consistent interface to all types of fieldmap estimation
    strategies.
    The actual type of method for estimation is inferred from the ``sources`` input,
    and collects all the available metadata.

    """

    sources = attr.ib(
        default=None,
        converter=lambda v: [
            FieldmapFile(f) if not isinstance(f, FieldmapFile) else f
            for f in listify(v)
        ],
        repr=lambda v: f"<{len(v)} files>",
    )
    """File path or list of paths indicating the source data to estimate a fieldmap."""

    method = attr.ib(init=False,
                     default=EstimatorType.UNKNOWN,
                     on_setattr=_type_setter)
    """Flag indicating the estimator type inferred from the input sources."""

    bids_id = attr.ib(default=None,
                      kw_only=True,
                      type=str,
                      on_setattr=_id_setter)
    """The unique ``B0FieldIdentifier`` field of this fieldmap."""

    _wf = attr.ib(init=False, default=None, repr=False)
    """Internal pointer to a workflow."""
    def __attrs_post_init__(self):
        """Determine the inteded fieldmap estimation type and check for data completeness."""
        suffix_list = [f.suffix for f in self.sources]
        suffix_set = set(suffix_list)

        # Fieldmap option 1: actual field-mapping sequences
        fmap_types = suffix_set.intersection(
            ("fieldmap", "phasediff", "phase1", "phase2"))
        if len(fmap_types) > 1 and fmap_types - set(("phase1", "phase2")):
            raise TypeError(
                f"Incompatible suffices found: <{','.join(fmap_types)}>.")

        if fmap_types:
            sources = sorted(
                str(f.path) for f in self.sources
                if f.suffix in ("fieldmap", "phasediff", "phase1", "phase2"))

            # Automagically add the corresponding phase2 file if missing as argument
            missing_phases = ("phase1" not in fmap_types, "phase2"
                              not in fmap_types)
            if sum(missing_phases) == 1:
                mis_ph = "phase1" if missing_phases[0] else "phase2"
                hit_ph = "phase2" if missing_phases[0] else "phase1"
                new_source = sources[0].replace(hit_ph, mis_ph)
                self.sources.append(FieldmapFile(new_source))
                sources.insert(int(missing_phases[1]), new_source)

            # Set method, this cannot be undone
            self.method = MODALITIES[fmap_types.pop()]

            # Determine the name of the corresponding (first) magnitude file(s)
            magnitude = f"magnitude{'' if self.method == EstimatorType.MAPPED else '1'}"
            if magnitude not in suffix_set:
                try:
                    self.sources.append(
                        FieldmapFile(sources[0].replace(
                            "fieldmap",
                            "magnitude").replace("diff", "1").replace(
                                "phase", "magnitude")))
                except Exception:
                    raise ValueError(
                        "A fieldmap or phase-difference estimation type was found, "
                        f"but an anatomical reference ({magnitude} file) is missing."
                    )

            # Check presence and try to find (if necessary) the second magnitude file
            if (self.method == EstimatorType.PHASEDIFF
                    and "magnitude2" not in suffix_set):
                try:
                    self.sources.append(
                        FieldmapFile(sources[-1].replace("diff", "2").replace(
                            "phase", "magnitude")))
                except Exception:
                    if "phase2" in suffix_set:
                        raise ValueError(
                            "A phase-difference estimation (phase1/2) type was found, "
                            "but an anatomical reference (magnitude2 file) is missing."
                        )

        # Fieldmap option 2: PEPOLAR (and fieldmap-less or ANAT)
        # IMPORTANT NOTE: fieldmap-less approaches can be considered PEPOLAR with RO = 0.0s
        pepolar_types = suffix_set.intersection(
            ("bold", "dwi", "epi", "sbref"))
        anat_types = suffix_set.intersection(("T1w", "T2w"))
        _pepolar_estimation = (len([
            f for f in suffix_list if f in ("bold", "dwi", "epi", "sbref")
        ]) > 1)

        if _pepolar_estimation and not anat_types:
            self.method = MODALITIES[pepolar_types.pop()]
            _pe = set(f.metadata["PhaseEncodingDirection"]
                      for f in self.sources)
            if len(_pe) == 1:
                raise ValueError(
                    f"Only one phase-encoding direction <{_pe.pop()}> found across sources."
                )
        elif anat_types:
            self.method = MODALITIES[anat_types.pop()]

            if not pepolar_types:
                raise ValueError(
                    "Only anatomical sources were found, cannot estimate fieldmap."
                )

        if self.method == EstimatorType.UNKNOWN:
            # No method has been identified -> fail.
            raise ValueError("Insufficient sources to estimate a fieldmap.")

        intents_meta = set(
            el for f in self.sources
            for el in listify(f.metadata.get("IntendedFor") or []))

        # Register this estimation method
        if not self.bids_id:
            # If not manually set, try to get it from BIDS metadata
            bids_ids = set([
                f.metadata.get("B0FieldIdentifier") for f in self.sources
                if f.metadata.get("B0FieldIdentifier")
            ])
            if len(bids_ids) > 1:
                raise ValueError(
                    f"Multiple ``B0FieldIdentifier`` set: <{', '.join(bids_ids)}>"
                )
            elif bids_ids:
                object.__setattr__(self, "bids_id", bids_ids.pop())
            else:
                bids_id = _estimators.add(self.paths())
                object.__setattr__(self, "bids_id", bids_id)
                for intent_file in intents_meta:
                    _intents[intent_file].add(bids_id)
                return

        _estimators[self.bids_id] = self.paths()
        for intent_file in intents_meta:
            _intents[intent_file].add(self.bids_id)

    def paths(self):
        """Return a tuple of paths that are sorted."""
        return tuple(sorted(str(f.path) for f in self.sources))

    def get_workflow(self, **kwargs):
        """Build the estimation workflow corresponding to this instance."""
        if self._wf is not None:
            return self._wf

        # Override workflow name
        kwargs["name"] = f"wf_{self.bids_id}"

        if self.method in (EstimatorType.MAPPED, EstimatorType.PHASEDIFF):
            from .workflows.fit.fieldmap import init_fmap_wf

            kwargs["mode"] = str(self.method).rpartition(".")[-1].lower()
            self._wf = init_fmap_wf(**kwargs)
            self._wf.inputs.inputnode.magnitude = [
                str(f.path) for f in self.sources
                if f.suffix.startswith("magnitude")
            ]
            self._wf.inputs.inputnode.fieldmap = [
                (str(f.path), f.metadata) for f in self.sources
                if f.suffix in ("fieldmap", "phasediff", "phase2", "phase1")
            ]
        elif self.method == EstimatorType.PEPOLAR:
            from .workflows.fit.pepolar import init_topup_wf

            self._wf = init_topup_wf(**kwargs)
        elif self.method == EstimatorType.ANAT:
            from .workflows.fit.syn import init_syn_sdc_wf

            self._wf = init_syn_sdc_wf(**kwargs)

        return self._wf
Example #29
0
    def _align_variables(self, variables):
        """Checks whether the specified variables have aligned indexes. This
        implies either that all variables are dense, or that all variables are
        sparse and have exactly the same onsets and durations. If variables are
        not aligned and force = True, all variables will be forced to dense
        format in order to ensure alignment.
        """

        if self._aligned_required is None or self._aligned_required == 'none':
            return

        def _align(variables):
            # If any variable is dense, all variables must be dense
            sparse = [c for c in variables if isinstance(c, SparseRunVariable)]
            if len(sparse) < len(variables):
                if sparse:
                    msg = ("Found a mix of dense and sparse variables. May "
                           "cause problems for some transformations.")
                    warnings.warn(msg)
            # If all are sparse, durations, onsets, and index must match
            # perfectly for all
            else:

                def get_col_data(col):
                    return np.c_[col.values.index, col.duration, col.onset]

                def compare_variables(a, b):
                    return len(a) == len(b) and np.allclose(a, b)

                # Compare 1st col with each of the others
                fc = get_col_data(variables[0])
                if not all([
                        compare_variables(fc, get_col_data(c))
                        for c in variables[1:]
                ]):
                    if self._aligned_required == 'force_dense':
                        msg = ("Forcing all sparse variables to dense in "
                               "order to ensure proper alignment.")
                        sr = self.collection.sampling_rate
                        variables = [c.to_dense(sr) for c in variables]
                        warnings.warn(msg)
                    else:
                        raise ValueError(
                            "Misaligned sparse variables found."
                            "To force variables into alignment by densifying,"
                            "set dense=True in the Transformation arguments")

        _aligned_variables = True if not self._aligned_variables \
            else self._aligned_variables
        _aligned_variables = [
            listify(self.kwargs[v]) for v in listify(_aligned_variables)
            if v in self.kwargs
        ]
        _aligned_variables = list(itertools.chain(*_aligned_variables))
        _aligned_variables = [
            self.collection[c] for c in _aligned_variables if c
        ]

        if _aligned_variables and self._loopable:
            for c in variables:
                # TODO: should clone all variables in align_variables before
                # alignment to prevent conversion to dense in any given
                # iteration having side effects. This could be an issue if,
                # e.g., some vars in 'variables' are dense and some are sparse.
                _align([c] + _aligned_variables)
        else:
            _align(listify(variables) + _aligned_variables)
Example #30
0
    def _run_interface(self, runtime):
        # Ready the output folder
        base_directory = runtime.cwd
        if isdefined(self.inputs.base_directory):
            base_directory = self.inputs.base_directory
        base_directory = Path(base_directory).absolute()
        out_path = base_directory / self.out_path_base
        out_path.mkdir(exist_ok=True, parents=True)

        # Ensure we have a list
        in_file = listify(self.inputs.in_file)

        # Read in the dictionary of metadata
        if isdefined(self.inputs.meta_dict):
            meta = self.inputs.meta_dict
            # inputs passed in construction take priority
            meta.update(self._metadata)
            self._metadata = meta

        # Initialize entities with those from the source file.
        in_entities = [
            parse_file_entities(str(relative_to_root(source_file)))
            for source_file in self.inputs.source_file
        ]
        out_entities = {
            k: v
            for k, v in in_entities[0].items() if all(
                ent.get(k) == v for ent in in_entities[1:])
        }
        for drop_entity in listify(self.inputs.dismiss_entities or []):
            out_entities.pop(drop_entity, None)

        # Override extension with that of the input file(s)
        out_entities["extension"] = [
            # _splitext does not accept .surf.gii (for instance)
            "".join(Path(orig_file).suffixes).lstrip(".")
            for orig_file in in_file
        ]

        compress = listify(self.inputs.compress) or [None]
        if len(compress) == 1:
            compress = compress * len(in_file)
        for i, ext in enumerate(out_entities["extension"]):
            if compress[i] is not None:
                ext = regz.sub("", ext)
                out_entities["extension"][
                    i] = f"{ext}.gz" if compress[i] else ext

        # Override entities with those set as inputs
        for key in self._allowed_entities:
            value = getattr(self.inputs, key)
            if value is not None and isdefined(value):
                out_entities[key] = value

        # Clean up native resolution with space
        if out_entities.get("resolution") == "native" and out_entities.get(
                "space"):
            out_entities.pop("resolution", None)

        if len(set(out_entities["extension"])) == 1:
            out_entities["extension"] = out_entities["extension"][0]

        # Insert custom (non-BIDS) entities from allowed_entities.
        custom_entities = set(out_entities.keys()) - set(BIDS_DERIV_ENTITIES)
        patterns = BIDS_DERIV_PATTERNS
        if custom_entities:
            # Example: f"{key}-{{{key}}}" -> "task-{task}"
            custom_pat = "_".join(f"{key}-{{{key}}}"
                                  for key in sorted(custom_entities))
            patterns = [
                pat.replace("_{suffix", "_".join(("", custom_pat, "{suffix")))
                for pat in patterns
            ]

        # Prepare SimpleInterface outputs object
        self._results["out_file"] = []
        self._results["compression"] = []
        self._results["fixed_hdr"] = [False] * len(in_file)

        dest_files = build_path(out_entities, path_patterns=patterns)
        if not dest_files:
            raise ValueError(
                f"Could not build path with entities {out_entities}.")

        # Make sure the interpolated values is embedded in a list, and check
        dest_files = listify(dest_files)
        if len(in_file) != len(dest_files):
            raise ValueError(f"Input files ({len(in_file)}) not matched "
                             f"by interpolated patterns ({len(dest_files)}).")

        for i, (orig_file, dest_file) in enumerate(zip(in_file, dest_files)):
            out_file = out_path / dest_file
            out_file.parent.mkdir(exist_ok=True, parents=True)
            self._results["out_file"].append(str(out_file))
            self._results["compression"].append(
                _copy_any(orig_file, str(out_file)))

            is_nifti = out_file.name.endswith(
                (".nii", ".nii.gz")) and not out_file.name.endswith(
                    (".dtseries.nii", ".dtseries.nii.gz"))
            data_dtype = self.inputs.data_dtype or DEFAULT_DTYPES[
                self.inputs.suffix]
            if is_nifti and any((self.inputs.check_hdr, data_dtype)):
                # Do not use mmap; if we need to access the data at all, it will be to
                # rewrite, risking a BusError
                nii = nb.load(out_file, mmap=False)

                if self.inputs.check_hdr:
                    hdr = nii.header
                    curr_units = tuple([
                        None if u == "unknown" else u
                        for u in hdr.get_xyzt_units()
                    ])
                    curr_codes = (int(hdr["qform_code"]),
                                  int(hdr["sform_code"]))

                    # Default to mm, use sec if data type is bold
                    units = (
                        curr_units[0] or "mm",
                        "sec" if out_entities["suffix"] == "bold" else None,
                    )
                    xcodes = (1, 1)  # Derivative in its original scanner space
                    if self.inputs.space:
                        xcodes = ((4, 4) if self.inputs.space
                                  in STANDARD_SPACES else (2, 2))

                    if curr_codes != xcodes or curr_units != units:
                        self._results["fixed_hdr"][i] = True
                        hdr.set_qform(nii.affine, xcodes[0])
                        hdr.set_sform(nii.affine, xcodes[1])
                        hdr.set_xyzt_units(*units)

                        # Rewrite file with new header
                        overwrite_header(nii, out_file)

                if data_dtype == "source":  # match source dtype
                    try:
                        data_dtype = nb.load(
                            self.inputs.source_file[0]).get_data_dtype()
                    except Exception:
                        LOGGER.warning(
                            f"Could not get data type of file {self.inputs.source_file[0]}"
                        )
                        data_dtype = None

                if data_dtype:
                    if self.inputs.check_hdr:
                        # load updated NIfTI
                        nii = nb.load(out_file, mmap=False)
                    data_dtype = np.dtype(data_dtype)
                    orig_dtype = nii.get_data_dtype()
                    if orig_dtype != data_dtype:
                        LOGGER.warning(
                            f"Changing {out_file} dtype from {orig_dtype} to {data_dtype}"
                        )
                        # coerce dataobj to new data dtype
                        if np.issubdtype(data_dtype, np.integer):
                            new_data = np.rint(nii.dataobj).astype(data_dtype)
                        else:
                            new_data = np.asanyarray(nii.dataobj,
                                                     dtype=data_dtype)
                        # and set header to match
                        nii.set_data_dtype(data_dtype)
                        nii = nii.__class__(new_data, nii.affine, nii.header)
                        nii.to_filename(out_file)

        if len(self._results["out_file"]) == 1:
            meta_fields = self.inputs.copyable_trait_names()
            self._metadata.update({
                k: getattr(self.inputs, k)
                for k in meta_fields if k not in self._static_traits
            })
            if self._metadata:
                out_file = Path(self._results["out_file"][0])
                # 1.3.x hack
                # For dtseries, we have been generating weird non-BIDS JSON files.
                # We can safely keep producing them to avoid breaking derivatives, but
                # only the existing keys should keep going into them.
                if out_file.name.endswith(".dtseries.nii"):
                    legacy_metadata = {}
                    for key in ("grayordinates", "space", "surface",
                                "surface_density", "volume"):
                        if key in self._metadata:
                            legacy_metadata[key] = self._metadata.pop(key)
                    if legacy_metadata:
                        sidecar = out_file.parent / f"{_splitext(str(out_file))[0]}.json"
                        sidecar.write_text(
                            dumps(legacy_metadata, sort_keys=True, indent=2))
                # The future: the extension is the first . and everything after
                sidecar = out_file.parent / f"{out_file.name.split('.', 1)[0]}.json"
                sidecar.write_text(
                    dumps(self._metadata, sort_keys=True, indent=2))
                self._results["out_meta"] = str(sidecar)
        return runtime
Example #31
0
 def __init__(self, fields=None, undef_fields=False, **inputs):
     super(ReadSidecarJSON, self).__init__(**inputs)
     self._fields = listify(fields or [])
     self._undef_fields = undef_fields
Example #32
0
    def __attrs_post_init__(self):
        """Determine the inteded fieldmap estimation type and check for data completeness."""
        suffix_list = [f.suffix for f in self.sources]
        suffix_set = set(suffix_list)

        # Fieldmap option 1: actual field-mapping sequences
        fmap_types = suffix_set.intersection(
            ("fieldmap", "phasediff", "phase1", "phase2"))
        if len(fmap_types) > 1 and fmap_types - set(("phase1", "phase2")):
            raise TypeError(
                f"Incompatible suffices found: <{','.join(fmap_types)}>.")

        if fmap_types:
            sources = sorted(
                str(f.path) for f in self.sources
                if f.suffix in ("fieldmap", "phasediff", "phase1", "phase2"))

            # Automagically add the corresponding phase2 file if missing as argument
            missing_phases = ("phase1" not in fmap_types, "phase2"
                              not in fmap_types)
            if sum(missing_phases) == 1:
                mis_ph = "phase1" if missing_phases[0] else "phase2"
                hit_ph = "phase2" if missing_phases[0] else "phase1"
                new_source = sources[0].replace(hit_ph, mis_ph)
                self.sources.append(FieldmapFile(new_source))
                sources.insert(int(missing_phases[1]), new_source)

            # Set method, this cannot be undone
            self.method = MODALITIES[fmap_types.pop()]

            # Determine the name of the corresponding (first) magnitude file(s)
            magnitude = f"magnitude{'' if self.method == EstimatorType.MAPPED else '1'}"
            if magnitude not in suffix_set:
                try:
                    self.sources.append(
                        FieldmapFile(sources[0].replace(
                            "fieldmap",
                            "magnitude").replace("diff", "1").replace(
                                "phase", "magnitude")))
                except Exception:
                    raise ValueError(
                        "A fieldmap or phase-difference estimation type was found, "
                        f"but an anatomical reference ({magnitude} file) is missing."
                    )

            # Check presence and try to find (if necessary) the second magnitude file
            if (self.method == EstimatorType.PHASEDIFF
                    and "magnitude2" not in suffix_set):
                try:
                    self.sources.append(
                        FieldmapFile(sources[-1].replace("diff", "2").replace(
                            "phase", "magnitude")))
                except Exception:
                    if "phase2" in suffix_set:
                        raise ValueError(
                            "A phase-difference estimation (phase1/2) type was found, "
                            "but an anatomical reference (magnitude2 file) is missing."
                        )

        # Fieldmap option 2: PEPOLAR (and fieldmap-less or ANAT)
        # IMPORTANT NOTE: fieldmap-less approaches can be considered PEPOLAR with RO = 0.0s
        pepolar_types = suffix_set.intersection(
            ("bold", "dwi", "epi", "sbref"))
        anat_types = suffix_set.intersection(("T1w", "T2w"))
        _pepolar_estimation = (len([
            f for f in suffix_list if f in ("bold", "dwi", "epi", "sbref")
        ]) > 1)

        if _pepolar_estimation and not anat_types:
            self.method = MODALITIES[pepolar_types.pop()]
            _pe = set(f.metadata["PhaseEncodingDirection"]
                      for f in self.sources)
            if len(_pe) == 1:
                raise ValueError(
                    f"Only one phase-encoding direction <{_pe.pop()}> found across sources."
                )
        elif anat_types:
            self.method = MODALITIES[anat_types.pop()]

            if not pepolar_types:
                raise ValueError(
                    "Only anatomical sources were found, cannot estimate fieldmap."
                )

        if self.method == EstimatorType.UNKNOWN:
            # No method has been identified -> fail.
            raise ValueError("Insufficient sources to estimate a fieldmap.")

        intents_meta = set(
            el for f in self.sources
            for el in listify(f.metadata.get("IntendedFor") or []))

        # Register this estimation method
        if not self.bids_id:
            # If not manually set, try to get it from BIDS metadata
            bids_ids = set([
                f.metadata.get("B0FieldIdentifier") for f in self.sources
                if f.metadata.get("B0FieldIdentifier")
            ])
            if len(bids_ids) > 1:
                raise ValueError(
                    f"Multiple ``B0FieldIdentifier`` set: <{', '.join(bids_ids)}>"
                )
            elif bids_ids:
                object.__setattr__(self, "bids_id", bids_ids.pop())
            else:
                bids_id = _estimators.add(self.paths())
                object.__setattr__(self, "bids_id", bids_id)
                for intent_file in intents_meta:
                    _intents[intent_file].add(bids_id)
                return

        _estimators[self.bids_id] = self.paths()
        for intent_file in intents_meta:
            _intents[intent_file].add(self.bids_id)
Example #33
0
def _load_tsv_variables(layout,
                        suffix,
                        dataset=None,
                        columns=None,
                        prepend_type=False,
                        scope='all',
                        **selectors):
    """Reads variables from scans.tsv, sessions.tsv, and participants.tsv.

    Parameters
    ----------
    layout : :obj:`bids.layout.BIDSLayout`
        The BIDSLayout to use.
    suffix : str
        The suffix of file to read from. Must be one of 'scans',
        'sessions', or 'participants'.
    dataset : NodeIndex
        A BIDS NodeIndex container. If None, a new one is
        initialized.
    columns : list
        Optional list of names specifying which columns in the
        files to return. If None, all columns are returned.
    prepend_type : bool
        If True, variable names are prepended with the
        type name (e.g., 'age' becomes 'participants.age').
    scope : str or list
        The scope of the space to search for variables. See
        docstring for BIDSLayout for details and valid predefined values.
    selectors : dict
        Optional keyword arguments passed onto the
        BIDSLayout instance's get() method; can be used to constrain
        which data are loaded.

    Returns
    -------
    A NodeIndex instance.
    """

    # Sanitize the selectors: only keep entities at current level or above
    remap = {'scans': 'run', 'sessions': 'session', 'participants': 'subject'}
    level = remap[suffix]
    valid_entities = BASE_ENTITIES[:BASE_ENTITIES.index(level)]
    layout_kwargs = {k: v for k, v in selectors.items() if k in valid_entities}

    if dataset is None:
        dataset = NodeIndex()

    files = layout.get(extension='.tsv',
                       suffix=suffix,
                       scope=scope,
                       **layout_kwargs)

    for f in files:

        _data = f.get_df(include_timing=False)

        # Entities can be defined either within the first column of the .tsv
        # file (for entities that vary by row), or from the full file path
        # (for entities constant over all rows in the file). We extract both
        # and store them in the main DataFrame alongside other variables (as
        # they'll be extracted when the BIDSVariable is initialized anyway).
        for ent_name, ent_val in f.entities.items():
            if ent_name in ALL_ENTITIES:
                _data[ent_name] = ent_val

        # Handling is a bit more convoluted for scans.tsv, because the first
        # column contains the run filename, which we also need to parse.
        if suffix == 'scans':

            # Suffix is guaranteed to be present in each filename, so drop the
            # constant column with value 'scans' to make way for it and prevent
            # two 'suffix' columns.
            _data.drop(columns=['suffix'], inplace=True)

            image = _data['filename']
            _data = _data.drop('filename', axis=1)
            dn = f._dirname
            paths = [str(dn / p) for p in image.values]
            ent_recs = [
                dict(layout.files[p].entities) for p in paths
                if p in layout.files
            ]
            ent_cols = pd.DataFrame.from_records(ent_recs)

            # Remove entity columns found in both DFs
            dupes = list(set(ent_cols.columns) & set(_data.columns))
            to_drop = ['extension'] + dupes
            ent_cols.drop(columns=to_drop, inplace=True)

            _data = pd.concat([_data, ent_cols], axis=1, sort=True)

        # The BIDS spec requires ID columns to be named 'session_id', 'run_id',
        # etc., and IDs begin with entity prefixes (e.g., 'sub-01'). To ensure
        # consistent internal handling, we strip these suffixes and prefixes.
        elif suffix == 'sessions':
            _data = _data.rename(columns={'session_id': 'session'})
            _data['session'] = _data['session'].str.replace('ses-', '')
        elif suffix == 'participants':
            _data = _data.rename(columns={'participant_id': 'subject'})
            _data['subject'] = _data['subject'].str.replace('sub-', '')

        def make_patt(x, regex_search=False):
            patt = '%s' % x
            if isinstance(x, (int, float)):
                # allow for leading zeros if a number was specified
                # regardless of regex_search
                patt = '0*' + patt
            if not regex_search:
                patt = '^%s$' % patt
            return patt

        # Filter rows on all selectors
        comm_cols = list(set(_data.columns) & set(selectors.keys()))
        for col in comm_cols:
            ent_patts = [
                make_patt(x, regex_search=layout.regex_search)
                for x in listify(selectors.get(col))
            ]
            patt = '|'.join(ent_patts)

            _data = _data[_data[col].str.contains(patt)]

        level = {
            'scans': 'session',
            'sessions': 'subject',
            'participants': 'dataset'
        }[suffix]

        node = dataset.get_or_create_node(level, f.entities)

        ent_cols = list(set(ALL_ENTITIES) & set(_data.columns))
        amp_cols = list(set(_data.columns) - set(ent_cols))

        if columns is not None:
            amp_cols = list(set(amp_cols) & set(columns))

        for col_name in amp_cols:

            # Rename columns: values must be in 'amplitude'
            df = _data.loc[:, [col_name] + ent_cols]
            df.columns = ['amplitude'] + ent_cols

            if prepend_type:
                col_name = '%s.%s' % (suffix, col_name)

            node.add_variable(
                SimpleVariable(name=col_name, data=df, source=suffix))

    return dataset
Example #34
0
def _flatten(inlist):
    from bids.utils import listify

    return [el for items in listify(inlist) for el in listify(items)]