Beispiel #1
0
    def __init__(self, name, config, root):
        """
        A set of rules that applies to one or more directories
        within a Layout.

        Args:
            name (str): The name of the Domain.
            config (dict): The configuration dictionary that defines the
                entities and paths for the current domain.
            root (str, list): The root directory or directories to which the
                Domain's rules applies. Can be either a single path, or a list.
        """

        self.name = name
        self.config = config
        self.root = root
        self.entities = {}
        self.files = []
        self.path_patterns = []

        self.include = listify(self.config.get('include', []))
        self.exclude = listify(self.config.get('exclude', []))

        if self.include and self.exclude:
            raise ValueError("The 'include' and 'exclude' arguments cannot "
                             "both be set. Please pass at most one of these "
                             "for domain '%s'." % self.name)

        if 'default_path_patterns' in config:
            self.path_patterns += listify(config['default_path_patterns'])
Beispiel #2
0
    def __init__(self, config):
        """
        A set of rules that applies to one or more directories
        within a Layout.

        Args:
            name (str): The name of the Domain.
            config (dict): The configuration dictionary that defines the
                entities and paths for the current domain.
        """

        self.name = config['name']
        self.config = config
        self.entities = {}
        self.files = []

        self.include = listify(self.config.get('include', []))
        self.exclude = listify(self.config.get('exclude', []))

        if self.include and self.exclude:
            raise ValueError("The 'include' and 'exclude' arguments cannot "
                             "both be set. Please pass at most one of these "
                             "for domain '%s'." % self.name)

        self.path_patterns = listify(config.get('default_path_patterns', []))
Beispiel #3
0
    def _matches(self,
                 entities=None,
                 extensions=None,
                 domains=None,
                 regex_search=False):
        """
        Checks whether the file matches all of the passed entities and
        extensions.

        Args:
            entities (dict): A dictionary of entity names -> regex patterns.
            extensions (str, list): One or more file extensions to allow.
            domains (str, list): One or more domains the file must match.
            regex_search (bool): Whether to require exact match (False) or
                regex search (True) when comparing the query string to each
                entity.
        Returns:
            True if _all_ entities and extensions match; False otherwise.
        """
        if extensions is not None:
            if isinstance(extensions, six.string_types):
                extensions = [extensions]
            extensions = '(' + '|'.join(extensions) + ')$'
            if re.search(extensions, self.filename) is None:
                return False

        if domains is not None:
            domains = listify(domains)
            if not set(self.domains) & set(domains):
                return False

        if entities is not None:

            for name, val in entities.items():

                if (name not in self.tags) ^ (val is None):
                    return False

                if val is None:
                    continue

                def make_patt(x):
                    patt = '%s' % x
                    if isinstance(x, (int, float)):
                        # allow for leading zeros if a number was specified
                        # regardless of regex_search
                        patt = '0*' + patt
                    if not regex_search:
                        patt = '^%s$' % patt
                    return patt

                ent_patts = [make_patt(x) for x in listify(val)]
                patt = '|'.join(ent_patts)

                if re.search(patt, str(self.tags[name].value)) is None:
                    return False
        return True
Beispiel #4
0
    def __init__(self,
                 paths,
                 root=None,
                 validate=False,
                 index_associated=True,
                 include=None,
                 exclude=None,
                 absolute_paths=True,
                 **kwargs):

        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate

        # Determine which configs to load
        conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json')
        all_confs = ['bids', 'derivatives']

        def map_conf(x):
            if isinstance(x, six.string_types) and x in all_confs:
                return conf_path % x
            return x

        paths = listify(paths, ignore=list)

        for i, p in enumerate(paths):
            if isinstance(p, six.string_types):
                paths[i] = (p, conf_path % 'bids')
                if len(paths) == 1 and root is None:
                    root = p
            elif isinstance(p, tuple):
                doms = [map_conf(d) for d in listify(p[1])]
                paths[i] = (p[0], doms)

        self.root = '/' if root is None else root

        target = pathjoin(self.root, 'dataset_description.json')
        if not exists(target):
            warnings.warn("'dataset_description.json' file is missing from "
                          "project root. You may want to set the root path to "
                          "a valid BIDS project.")
            self.description = None
        else:
            self.description = json.load(open(target, 'r'))
            for k in ['Name', 'BIDSVersion']:
                if k not in self.description:
                    raise ValueError("Mandatory '%s' field missing from "
                                     "dataset_description.json." % k)

        super(BIDSLayout, self).__init__(paths,
                                         root=root,
                                         dynamic_getters=True,
                                         include=include,
                                         exclude=exclude,
                                         absolute_paths=absolute_paths,
                                         **kwargs)
Beispiel #5
0
    def _index_file(self, root, f, domains, update_layout=True):

        # Create the file object--allows for subclassing
        f = self._make_file_object(root, f)

        for d in listify(domains):
            if d not in self.domains:
                raise ValueError("Cannot index file '%s' in domain '%s'; "
                                 "no domain with that name exists." %
                                 (f.path, d))
            domain = self.domains[d]
            match_vals = {}
            for e in domain.entities.values():
                m = e.match_file(f)
                if m is None and e.mandatory:
                    break
                if m is not None:
                    match_vals[e.name] = (e, m)

            if match_vals:
                for k, (ent, val) in match_vals.items():
                    f.tags[k] = Tag(ent, val)
                    if update_layout:
                        ent.add_file(f.path, val)

            if update_layout:
                domain.add_file(f)

        self.files[f.path] = f

        return f
Beispiel #6
0
    def _load_config(self, config):
        if isinstance(config, six.string_types):
            config = json.load(open(config, 'r'))
        elif isinstance(config, list):
            merged = {}
            for c in config:
                if isinstance(c, six.string_types):
                    c = json.load(open(c, 'r'))
                merged.update(c)
            config = merged

        for e in config['entities']:
            self.add_entity(**e)

        if 'index' in config:
            self.filtering_regex = config['index']
            if self.filtering_regex.get('include') and \
               self.filtering_regex.get('exclude'):
                raise ValueError("You can only define either include or "
                                 "exclude regex, not both.")

        if 'default_path_patterns' in config:
            self.path_patterns += listify(config['default_path_patterns'])

        return config
Beispiel #7
0
    def _index_file(self, root, f, domains, update_layout=True):

        # Create the file object--allows for subclassing
        f = self._make_file_object(root, f)

        for domain in listify(domains):
            domain = self.domains[domain]
            match_vals = {}
            for e in domain.entities.values():
                m = e.match_file(f)
                if m is None and e.mandatory:
                    break
                if m is not None:
                    match_vals[e.name] = (e, m)

            if match_vals:
                for k, (ent, val) in match_vals.items():
                    f.tags[k] = Tag(ent, val)
                    if update_layout:
                        ent.add_file(f.path, val)

            if update_layout:
                domain.add_file(f)

        if update_layout:
            f.domains = domains

        self.files[f.path] = f

        return f
Beispiel #8
0
    def _check_inclusions(self, f, domains=None, fullpath=True):
        ''' Check file or directory against regexes in config to determine if
            it should be included in the index '''

        filename = f if isinstance(f, six.string_types) else f.path

        if not fullpath:
            filename = basename(filename)

        if domains is None:
            domains = list(self.domains.keys())

        domains = [self.domains[dom] for dom in listify(domains)]

        # Inject the Layout at the first position for global include/exclude
        domains.insert(0, self)
        for dom in domains:
            # If file matches any include regex, then True
            if dom.include:
                for regex in dom.include:
                    if re.search(regex, filename):
                        return True
                return False
            else:
                # If file matches any exclude regex, then False
                for regex in dom.exclude:
                    if re.search(regex, filename, flags=re.UNICODE):
                        return False
        return True
Beispiel #9
0
    def __init__(self,
                 name,
                 pattern=None,
                 domain=None,
                 mandatory=False,
                 directory=None,
                 map_func=None,
                 dtype=None,
                 aliases=None,
                 **kwargs):
        """
        Represents a single entity defined in the JSON config.

        Args:
            name (str): The name of the entity (e.g., 'subject', 'run', etc.)
            pattern (str): A regex pattern used to match against file names.
                Must define at least one group, and only the first group is
                kept as the match.
            domain (Domain): The Domain the Entity belongs to.
            mandatory (bool): If True, every File _must_ match this entity.
            directory (str): Optional pattern defining a directory associated
                with the entity.
            map_func (callable): Optional callable used to extract the Entity's
                value from the passed string (instead of trying to match on the
                defined .pattern).
            dtype (str): The optional data type of the Entity values. Must be
                one of 'int', 'float', 'bool', or 'str'. If None, no type
                enforcement will be attempted, which means the dtype of the
                value may be unpredictable.
            aliases (str or list): Alternative names for the entity.
            kwargs (dict): Additional keyword arguments.
        """
        if pattern is None and map_func is None:
            raise ValueError("Invalid specification for Entity '%s'; no "
                             "pattern or mapping function provided. Either the"
                             " 'pattern' or the 'map_func' arguments must be "
                             "set." % name)
        self.name = name
        self.pattern = pattern
        self.domain = domain
        self.mandatory = mandatory
        self.directory = directory
        self.map_func = map_func
        self.kwargs = kwargs

        if isinstance(dtype, six.string_types):
            dtype = eval(dtype)
        if dtype not in [str, float, int, bool, None]:
            raise ValueError("Invalid dtype '%s'. Must be one of int, float, "
                             "bool, or str." % dtype)
        self.dtype = dtype

        self.files = {}
        self.regex = re.compile(pattern) if pattern is not None else None
        domain_name = getattr(domain, 'name', '')
        self.id = '.'.join([domain_name, name])
        aliases = [] if aliases is None else listify(aliases)
        self.aliases = ['.'.join([domain_name, alias]) for alias in aliases]
Beispiel #10
0
 def _get_domains_for_file(self, f):
     if isinstance(f, File):
         return f.domains
     domains = []
     for d in self.domains.values():
         for path in listify(d.root):
             if f.startswith(path):
                 domains.append(d.name)
                 break
     return domains
Beispiel #11
0
    def __call__(cls, path, *args, **kwargs):

        paths = listify(path)
        if len(paths) == 1:
            return super(LayoutMetaclass, cls).__call__(path, *args, **kwargs)
        layouts = []
        for p in paths:
            layout = super(LayoutMetaclass, cls).__call__(p, *args, **kwargs)
            layouts.append(layout)
        return merge_layouts(layouts)
Beispiel #12
0
    def __init__(self,
                 path,
                 config=None,
                 validate=False,
                 index_associated=True,
                 include=None,
                 exclude=None,
                 **kwargs):
        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate

        # Determine which configs to load
        conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json')
        _all_doms = ['bids', 'derivatives']
        if config is None:
            config = ['bids', 'derivatives']

        configs = []

        def _load_config(conf):
            if isinstance(conf, six.string_types):
                if conf in _all_doms:
                    conf = conf_path % conf
                conf = json.load(open(conf, 'r'))
            return conf

        for conf in listify(config):
            if isinstance(conf, tuple):
                _conf = _load_config(conf[0]).copy()
                if isinstance(conf[1], dict):
                    _conf.update(conf[1])
                else:
                    _conf['root'] = conf[1]
                configs.append(_conf)
            else:
                configs.append(_load_config(conf))

        # If 'bids' isn't in the list, the user probably made a mistake...
        if not any([c['name'] != 'bids' for c in configs]):
            warnings.warn("The core BIDS configuration was not included in the"
                          " config list. If you override the default value for"
                          " config, you probably want to make sure 'bids' is "
                          "included in the list of values.")

        super(BIDSLayout, self).__init__(path,
                                         config=configs,
                                         dynamic_getters=True,
                                         include=include,
                                         exclude=exclude,
                                         **kwargs)
Beispiel #13
0
    def add_derivatives(self, path, **kwargs):
        ''' Add BIDS-Derivatives datasets to tracking.

        Args:
            path (str, list): One or more paths to BIDS-Derivatives datasets.
                Each path can point to either a derivatives/ directory
                containing one more more pipeline directories, or to a single
                pipeline directory (e.g., derivatives/fmriprep).
            kwargs (dict): Optional keyword arguments to pass on to
                BIDSLayout() when initializing each of the derivative datasets.
        '''
        paths = listify(path)
        deriv_dirs = []

        # Collect all paths that contain a dataset_description.json
        def check_for_description(dir):
            dd = os.path.join(dir, 'dataset_description.json')
            return os.path.exists(dd)

        for p in paths:
            p = os.path.abspath(p)
            if check_for_description(p):
                deriv_dirs.append(p)
            else:
                subdirs = [
                    d for d in os.listdir(p)
                    if os.path.isdir(os.path.join(p, d))
                ]
                for sd in subdirs:
                    sd = os.path.join(p, sd)
                    if check_for_description(sd):
                        deriv_dirs.append(sd)

        for deriv in deriv_dirs:
            dd = os.path.join(deriv, 'dataset_description.json')
            description = json.load(open(dd, 'r'))
            pipeline_name = description.get('PipelineDescription.Name', None)
            if pipeline_name is None:
                raise ValueError("Every valid BIDS-derivatives dataset must "
                                 "have a PipelineDescription.Name field set "
                                 "inside dataset_description.json.")
            if pipeline_name in self.derivatives:
                raise ValueError("Pipeline name '%s' has already been added "
                                 "to this BIDSLayout. Every added pipeline "
                                 "must have a unique name!")
            # Default config and sources values
            kwargs['config'] = kwargs.get('config') or ['bids', 'derivatives']
            kwargs['sources'] = kwargs.get('sources') or self
            self.derivatives[pipeline_name] = BIDSLayout(deriv, **kwargs)
Beispiel #14
0
 def _get_fieldmaps(self, path):
     sub = os.path.split(path)[1].split("_")[0].split("sub-")[1]
     fieldmap_set = []
     suffix = '(phase1|phasediff|epi|fieldmap)'
     files = self.get(subject=sub,
                      suffix=suffix,
                      extensions=['nii.gz', 'nii'])
     for file in files:
         metadata = self.get_metadata(file.path)
         if metadata and "IntendedFor" in metadata.keys():
             intended_for = listify(metadata["IntendedFor"])
             if any([path.endswith(_suff) for _suff in intended_for]):
                 cur_fieldmap = {}
                 if file.suffix == "phasediff":
                     cur_fieldmap = {
                         "phasediff":
                         file.path,
                         "magnitude1":
                         file.path.replace("phasediff", "magnitude1"),
                         "suffix":
                         "phasediff"
                     }
                     magnitude2 = file.path.replace("phasediff",
                                                    "magnitude2")
                     if os.path.isfile(magnitude2):
                         cur_fieldmap['magnitude2'] = magnitude2
                 elif file.suffix == "phase1":
                     cur_fieldmap["phase1"] = file.path
                     cur_fieldmap["magnitude1"] = \
                         file.path.replace("phase1", "magnitude1")
                     cur_fieldmap["phase2"] = \
                         file.path.replace("phase1", "phase2")
                     cur_fieldmap["magnitude2"] = \
                         file.path.replace("phase1", "magnitude2")
                     cur_fieldmap["suffix"] = "phase"
                 elif file.suffix == "epi":
                     cur_fieldmap["epi"] = file.path
                     cur_fieldmap["suffix"] = "epi"
                 elif file.suffix == "fieldmap":
                     cur_fieldmap["fieldmap"] = file.path
                     cur_fieldmap["magnitude"] = \
                         file.path.replace("fieldmap", "magnitude")
                     cur_fieldmap["suffix"] = "fieldmap"
                 fieldmap_set.append(cur_fieldmap)
     return fieldmap_set
Beispiel #15
0
    def _load_domain(self, config, root=None, from_init=False):

        if isinstance(config, six.string_types):

            if isdir(config):
                config = join(config, self.config_filename)

            if not exists(config):
                raise ValueError("Config file '%s' cannot be found." % config)

            config_filename = config
            config = json.load(open(config, 'r'))

            if root is None and not from_init:
                root = dirname(abspath(config_filename))

        if 'name' not in config:
            raise ValueError("Config file missing 'name' attribute.")

        if config['name'] in self.domains:
            raise ValueError("Config with name '%s' already exists in "
                             "Layout. Name of each config file must be "
                             "unique across entire Layout." % config['name'])

        if root is None and from_init:
            # warnings.warn("No valid root directory found for domain '%s'. "
            #               "Falling back on root directory for Layout (%s)."
            #               % (config['name'], self.root))
            root = self.root

        if config.get('root') in [None, '.']:
            config['root'] = root

        for root in listify(config['root']):
            if not exists(root):
                raise ValueError("Root directory %s for domain %s does not "
                                 "exist!" % (root, config['name']))

        # Load entities
        domain = Domain(config['name'], config)
        for e in config.get('entities', []):
            self.add_entity(domain=domain, **e)

        self.domains[domain.name] = domain
        return domain
Beispiel #16
0
    def build_path(self,
                   source,
                   path_patterns=None,
                   strict=False,
                   domains=None):
        ''' Constructs a target filename for a file or dictionary of entities.

        Args:
            source (str, File, dict): The source data to use to construct the
                new file path. Must be one of:
                - A File object
                - A string giving the path of a File contained within the
                  current Layout.
                - A dict of entities, with entity names in keys and values in
                  values
            path_patterns (list): Optional path patterns to use to construct
                the new file path. If None, the Layout-defined patterns will
                be used.
            strict (bool): If True, all entities must be matched inside a
                pattern in order to be a valid match. If False, extra entities
                will be ignored so long as all mandatory entities are found.
            domains (str, list): Optional name(s) of domain(s) to scan for
                path patterns. If None, all domains are scanned. If two or more
                domains are provided, the order determines the precedence of
                path patterns (i.e., earlier domains will have higher
                precedence).
        '''

        if isinstance(source, six.string_types):
            if source not in self.files:
                source = join(self.root, source)

            source = self.get_file(source)

        if isinstance(source, File):
            source = source.entities

        if path_patterns is None:
            if domains is None:
                domains = list(self.domains.keys())
            path_patterns = []
            for dom in listify(domains):
                path_patterns.extend(self.domains[dom].path_patterns)

        return build_path(source, path_patterns, strict)
Beispiel #17
0
    def add_derivatives(self, path, **kwargs):
        ''' Add BIDS-Derivatives datasets to tracking.

        Args:
            path (str, list): One or more paths to BIDS-Derivatives datasets.
                Each path can point to either a derivatives/ directory
                containing one more more pipeline directories, or to a single
                pipeline directory (e.g., derivatives/fmriprep).
            kwargs (dict): Optional keyword arguments to pass on to
                BIDSLayout() when initializing each of the derivative datasets.
        '''
        paths = listify(path)
        deriv_dirs = []

        # Collect all paths that contain a dataset_description.json
        def check_for_description(dir):
            dd = os.path.join(dir, 'dataset_description.json')
            return os.path.exists(dd)

        for p in paths:
            p = os.path.abspath(p)
            if os.path.exists(p):
                if check_for_description(p):
                    deriv_dirs.append(p)
                else:
                    subdirs = [
                        d for d in os.listdir(p)
                        if os.path.isdir(os.path.join(p, d))
                    ]
                    for sd in subdirs:
                        sd = os.path.join(p, sd)
                        if check_for_description(sd):
                            deriv_dirs.append(sd)

        local_entities = set(ent.name for ent in self.entities.values())
        for deriv in deriv_dirs:
            dd = os.path.join(deriv, 'dataset_description.json')
            with open(dd, 'r', encoding='utf-8') as ddfd:
                description = json.load(ddfd)
            pipeline_name = description.get('PipelineDescription',
                                            {}).get('Name', None)
            if pipeline_name is None:
                raise ValueError("Every valid BIDS-derivatives dataset must "
                                 "have a PipelineDescription.Name field set "
                                 "inside dataset_description.json.")
            if pipeline_name in self.derivatives:
                raise ValueError("Pipeline name '%s' has already been added "
                                 "to this BIDSLayout. Every added pipeline "
                                 "must have a unique name!")
            # Default config and sources values
            kwargs['config'] = kwargs.get('config') or ['bids', 'derivatives']
            kwargs['sources'] = kwargs.get('sources') or self
            self.derivatives[pipeline_name] = BIDSLayout(deriv, **kwargs)

            # Propagate derivative entities into top-level dynamic getters
            deriv_entities = set(
                ent.name
                for ent in self.derivatives[pipeline_name].entities.values())
            for deriv_ent in deriv_entities - local_entities:
                local_entities.add(deriv_ent)
                getter = 'get_' + inflect.engine().plural(deriv_ent)
                if not hasattr(self, getter):
                    func = partial(self.get,
                                   target=deriv_ent,
                                   return_type='id')
                    setattr(self, getter, func)
Beispiel #18
0
    def get(self,
            return_type='object',
            target=None,
            extensions=None,
            derivatives=True,
            regex_search=None,
            defined_fields=None,
            domains=None,
            **kwargs):
        """
        Retrieve files and/or metadata from the current Layout.

        Args:
            return_type (str): Type of result to return. Valid values:
                'object' (default): return a list of matching BIDSFile objects.
                'file': return a list of matching filenames.
                'dir': return a list of directories.
                'id': return a list of unique IDs. Must be used together with
                    a valid target.
            target (str): Optional name of the target entity to get results for
                (only used if return_type is 'dir' or 'id').
            extensions (str, list): One or more file extensions to filter on.
                Files with any other extensions will be excluded.
            derivatives (bool, str, list): Whether/how to search associated
                BIDS-Derivatives datasets. If True (default), all available
                derivatives are searched. If a str or list, must be the name(s)
                of the derivatives to search (as defined in the
                PipelineDescription.Name field in dataset_description.json).
            regex_search (bool or None): Whether to require exact matching
                (False) or regex search (True) when comparing the query string
                to each entity. If None (default), uses the value found in
                self.
            defined_fields (list): Optional list of names of metadata fields
                that must be defined in JSON sidecars in order to consider the
                file a match, but which don't need to match any particular
                value.
            domains (str, list): Domain(s) to search in. Valid values are
                'bids' and 'derivatives'.
            kwargs (dict): Any optional key/values to filter the entities on.
                Keys are entity names, values are regexes to filter on. For
                example, passing filter={ 'subject': 'sub-[12]'} would return
                only files that match the first two subjects.

        Returns:
            A list of BIDSFile (default) or other objects
            (see return_type for details).
        """

        # Warn users still expecting 0.6 behavior
        if 'type' in kwargs:
            raise ValueError("As of pybids 0.7.0, the 'type' argument has been"
                             " replaced with 'suffix'.")

        if derivatives is True:
            derivatives = list(self.derivatives.keys())
        elif derivatives:
            derivatives = listify(derivatives)

        # Separate entity kwargs from metadata kwargs
        ent_kwargs, md_kwargs = {}, {}

        all_ents = self.get_domain_entities()
        if derivatives:
            for deriv in derivatives:
                deriv_ents = self.derivatives[deriv].get_domain_entities()
                all_ents.update(deriv_ents)

        for k, v in kwargs.items():
            if k in all_ents:
                ent_kwargs[k] = v
            else:
                md_kwargs[k] = v

        # Provide some suggestions if target is specified and invalid.
        if target is not None and target not in all_ents:
            import difflib
            potential = list(all_ents.keys())
            suggestions = difflib.get_close_matches(target, potential)
            if suggestions:
                message = "Did you mean one of: {}?".format(suggestions)
            else:
                message = "Valid targets are: {}".format(potential)
            raise ValueError(
                ("Unknown target '{}'. " + message).format(target))

        all_results = []

        # Get entity-based search results using the superclass's get()
        result = []
        result = super(BIDSLayout, self).get(return_type,
                                             target=target,
                                             extensions=extensions,
                                             domains=None,
                                             regex_search=regex_search,
                                             **ent_kwargs)

        # Search the metadata if needed
        if return_type not in {'dir', 'id'}:

            if md_kwargs:
                if return_type.startswith('obj'):
                    result = [f.path for f in result]

                result = self.metadata_index.search(result, defined_fields,
                                                    **md_kwargs)

                if return_type.startswith('obj'):
                    result = [self.files[f] for f in result]

        all_results.append(result)

        # Add results from derivatives
        if derivatives:
            for deriv in derivatives:
                deriv = self.derivatives[deriv]
                deriv_res = deriv.get(return_type, target, extensions, None,
                                      regex_search, **ent_kwargs)
                all_results.append(deriv_res)

        # Flatten results
        result = list(chain(*all_results))
        if return_type in ['dir', 'id']:
            result = list(set(result))

        return result
Beispiel #19
0
    def __init__(self,
                 paths,
                 root=None,
                 index=None,
                 dynamic_getters=False,
                 absolute_paths=True,
                 regex_search=False,
                 entity_mapper=None,
                 path_patterns=None,
                 config_filename='layout.json',
                 include=None,
                 exclude=None):
        """
        A container for all the files and metadata found at the specified path.

        Args:
            paths (str, list): The path(s) where project files are located.
                Must be one of:

                - A path to a directory containing files to index
                - A list of paths to directories to index
                - A list of 2-tuples where each tuple encodes a mapping from
                  directories to domains. The first element is a string or
                  list giving the paths to one or more directories to index.
                  The second element specifies which domains to apply to the
                  specified files, and can be one of:
                    * A string giving the path to a JSON config file
                    * A dictionary containing config information
                    * A list of any combination of strings or dicts

            root (str): Optional directory that all other paths will be
                relative to. If set, every other path the Layout sees must be
                at this level or below. If None, filesystem root ('/') is used.
            index (str): Optional path to a saved index file. If a valid value
                is passed, this index is used to populate Files and Entities,
                and the normal indexing process (which requires scanning all
                files in the project) is skipped.
            dynamic_getters (bool): If True, a get_{entity_name}() method will
                be dynamically added to the Layout every time a new Entity is
                created. This is implemented by creating a partial function of
                the get() function that sets the target argument to the
                entity name.
            absolute_paths (bool): If True, grabbit uses absolute file paths
                everywhere (including when returning query results). If False,
                the input path will determine the behavior (i.e., relative if
                a relative path was passed, absolute if an absolute path was
                passed).
            regex_search (bool): Whether to require exact matching (True)
                or regex search (False, default) when comparing the query
                string to each entity in .get() calls. This sets a default for
                the instance, but can be overridden in individual .get()
                requests.
            entity_mapper (object, str): An optional object containing methods
                for indexing specific entities. If passed, the object must
                contain a named method for every value that appears in the
                JSON config file under the "mapper" key of an Entity's entry.
                For example, if an entity "type" is defined that contains the
                key/value pair "mapper": "extract_type", then the passed object
                must contain an .extract_type() method.
                    Alternatively, the special string "self" can be passed, in
                which case the current Layout instance will be used as the
                entity mapper (implying that the user has subclassed Layout).
            path_patterns (str, list): One or more filename patterns to use
                as a default path pattern for this layout's files.  Can also
                be specified in the config file.
            config_filename (str): The name of directory-specific config files.
                Every directory will be scanned for this file, and if found,
                the config file will be read in and added to the list of
                configs.
            include (str, list): A string or list specifying regexes used to
                globally filter files when indexing. A file or directory
                *must* match at least of the passed values in order to be
                retained in the index. Cannot be used together with 'exclude'.
            exclude (str, list): A string or list specifying regexes used to
                globally filter files when indexing. If a file or directory
                *must* matches any of the passed values, it will be dropped
                from indexing. Cannot be used together with 'include'.
        """

        if include is not None and exclude is not None:
            raise ValueError("You cannot specify both the include and exclude"
                             " arguments. Please pass at most one of these.")

        self.entities = OrderedDict()
        self.files = {}
        self.mandatory = set()
        self.dynamic_getters = dynamic_getters
        self.regex_search = regex_search
        self.entity_mapper = self if entity_mapper == 'self' else entity_mapper
        self.path_patterns = path_patterns if path_patterns else []
        self.config_filename = config_filename
        self.domains = OrderedDict()
        self.include = listify(include or [])
        self.exclude = listify(exclude or [])
        self.absolute_paths = absolute_paths
        if root is None:
            root = '/'
        self.root = abspath(root)

        self._domain_map = {}

        # Extract path --> domain mapping
        self._paths_to_index = {}

        def add_path(path, val):
            path = abspath(path)
            self._paths_to_index[path] = val

        for p in listify(paths, ignore=list):
            if isinstance(p, six.string_types):
                add_path(p, [])
            else:
                doms = listify(p[1])
                doms = [self._get_or_load_domain(d) for d in doms]
                for elem in listify(p[0]):
                    add_path(elem, doms)

        # Verify existence of all paths
        for p in self._paths_to_index:
            if not exists(p):
                raise ValueError("Search path {} doesn't exist.".format(p))

        if index is None:
            self.index()
        else:
            self.load_index(index)
Beispiel #20
0
    def __init__(self,
                 paths,
                 root=None,
                 validate=False,
                 index_associated=True,
                 include=None,
                 exclude=None,
                 absolute_paths=True,
                 **kwargs):

        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate

        # Determine which configs to load
        conf_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'config', '%s.json')
        all_confs = ['bids', 'derivatives']

        def map_conf(x):
            if isinstance(x, six.string_types) and x in all_confs:
                return conf_path % x
            return x

        paths = listify(paths, ignore=list)

        for i, p in enumerate(paths):
            if isinstance(p, six.string_types):
                paths[i] = (p, conf_path % 'bids')
            elif isinstance(p, tuple):
                doms = [map_conf(d) for d in listify(p[1])]
                paths[i] = (p[0], doms)

        # Set root to longest valid common parent if it isn't explicitly set
        if root is None:
            abs_paths = [os.path.abspath(p[0]) for p in paths]
            root = commonpath(abs_paths)
            if not root:
                raise ValueError("One or more invalid paths passed; could not "
                                 "find a common parent directory of %s. Either"
                                 " make sure the paths are correct, or "
                                 "explicitly set the root using the 'root' "
                                 "argument." % abs_paths)

        self.root = root

        target = os.path.join(self.root, 'dataset_description.json')
        if not os.path.exists(target):
            warnings.warn("'dataset_description.json' file is missing from "
                          "project root. You may want to set the root path to "
                          "a valid BIDS project.")
            self.description = None
        else:
            self.description = json.load(open(target, 'r'))
            for k in ['Name', 'BIDSVersion']:
                if k not in self.description:
                    raise ValueError("Mandatory '%s' field missing from "
                                     "dataset_description.json." % k)

        super(BIDSLayout, self).__init__(paths,
                                         root=root,
                                         dynamic_getters=True,
                                         include=include,
                                         exclude=exclude,
                                         absolute_paths=absolute_paths,
                                         **kwargs)
Beispiel #21
0
    def __init__(self,
                 path,
                 config=None,
                 validate=False,
                 index_associated=True,
                 include=None,
                 exclude=None,
                 **kwargs):

        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate

        # Determine which configs to load
        conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json')
        _all_doms = ['bids', 'derivatives']
        if config is None:
            config = ['bids', 'derivatives']

        configs = []

        def _load_config(conf):
            if isinstance(conf, six.string_types):
                if conf in _all_doms:
                    conf = conf_path % conf
                conf = json.load(open(conf, 'r'))
            return conf

        for conf in listify(config):
            if isinstance(conf, tuple):
                _conf = _load_config(conf[0]).copy()
                if isinstance(conf[1], dict):
                    _conf.update(conf[1])
                else:
                    _conf['root'] = conf[1]
                configs.append(_conf)
            else:
                configs.append(_load_config(conf))

        # If 'bids' isn't in the list, the user probably made a mistake...
        if any([c['name'] == 'bids' for c in configs]):
            # Load and validate information in dataset_description.json
            target = pathjoin(path, 'dataset_description.json')
            if not exists(target):
                if not exists(path):
                    raise ValueError("Root directory does not exist.")
                raise ValueError(
                    "Mandatory 'dataset_description.json' file is "
                    "missing from project root!")
            self.description = json.load(open(target, 'r'))

            for k in ['Name', 'BIDSVersion']:
                if k not in self.description:
                    raise ValueError("Mandatory '%s' field missing from "
                                     "dataset_description.json." % k)
        else:
            warnings.warn("The core BIDS configuration was not included in the"
                          " config list. If you override the default value for"
                          " config, you probably want to make sure 'bids' is "
                          "included in the list of values.")

        super(BIDSLayout, self).__init__(path,
                                         config=configs,
                                         dynamic_getters=True,
                                         include=include,
                                         exclude=exclude,
                                         **kwargs)
Beispiel #22
0
    def __init__(self,
                 root=None,
                 config=None,
                 index=None,
                 dynamic_getters=False,
                 absolute_paths=True,
                 regex_search=False,
                 entity_mapper=None,
                 path_patterns=None,
                 config_filename='layout.json',
                 include=None,
                 exclude=None):
        """
        A container for all the files and metadata found at the specified path.

        Args:
            root (str): Directory that all other paths will be relative to.
            Every other path the Layout sees must be at this level or below.
            domains (str, list, dict): A specification of the configuration
                object(s) defining domains to use in the Layout. Can be one of:

                - A dictionary containing config information
                - A string giving the path to a JSON file containing the config
                - A string giving the path to a directory containing a
                  configuration file with the name defined in config_filename
                - A tuple with two elements, where the first element is one of
                  the above (i.e., dict or string), and the second element is
                  an iterable of directories to apply the config to.
                - A list, where each element is any of the above (dict, string,
                  or tuple).

            index (str): Optional path to a saved index file. If a valid value
                is passed, this index is used to populate Files and Entities,
                and the normal indexing process (which requires scanning all
                files in the project) is skipped.
            dynamic_getters (bool): If True, a get_{entity_name}() method will
                be dynamically added to the Layout every time a new Entity is
                created. This is implemented by creating a partial function of
                the get() function that sets the target argument to the
                entity name.
            absolute_paths (bool): If True, grabbit uses absolute file paths
                everywhere (including when returning query results). If False,
                the input path will determine the behavior (i.e., relative if
                a relative path was passed, absolute if an absolute path was
                passed).
            regex_search (bool): Whether to require exact matching (True)
                or regex search (False, default) when comparing the query
                string to each entity in .get() calls. This sets a default for
                the instance, but can be overridden in individual .get()
                requests.
            entity_mapper (object, str): An optional object containing methods
                for indexing specific entities. If passed, the object must
                contain a named method for every value that appears in the
                JSON config file under the "mapper" key of an Entity's entry.
                For example, if an entity "type" is defined that contains the
                key/value pair "mapper": "extract_type", then the passed object
                must contain an .extract_type() method.
                    Alternatively, the special string "self" can be passed, in
                which case the current Layout instance will be used as the
                entity mapper (implying that the user has subclassed Layout).
            path_patterns (str, list): One or more filename patterns to use
                as a default path pattern for this layout's files.  Can also
                be specified in the config file.
            config_filename (str): The name of directory-specific config files.
                Every directory will be scanned for this file, and if found,
                the config file will be read in and added to the list of
                configs.
            include (str, list): A string or list specifying regexes used to
                globally filter files when indexing. A file or directory
                *must* match at least of the passed values in order to be
                retained in the index. Cannot be used together with 'exclude'.
            exclude (str, list): A string or list specifying regexes used to
                globally filter files when indexing. If a file or directory
                *must* matches any of the passed values, it will be dropped
                from indexing. Cannot be used together with 'include'.
        """

        if include is not None and exclude is not None:
            raise ValueError("You cannot specify both the include and exclude"
                             " arguments. Please pass at most one of these.")

        self.entities = OrderedDict()
        self.files = {}
        self.mandatory = set()
        self.dynamic_getters = dynamic_getters
        self.regex_search = regex_search
        self.entity_mapper = self if entity_mapper == 'self' else entity_mapper
        self.path_patterns = path_patterns if path_patterns else []
        self.config_filename = config_filename
        self.domains = OrderedDict()
        self.include = listify(include or [])
        self.exclude = listify(exclude or [])
        self.absolute_paths = absolute_paths
        self.root = abspath(root) if absolute_paths else root

        if config is not None:
            for c in listify(config):
                if isinstance(c, tuple):
                    c, root = c
                else:
                    root = None
                self._load_domain(c, root, True)

        if index is None:
            self.index()
        else:
            self.load_index(index)
Beispiel #23
0
 def _get_files(self, root):
     ''' Returns all files in project (pre-filtering). Extend this in
     subclasses as needed. '''
     results = [os.walk(r, topdown=True) for r in listify(root)]
     return list(chain(*results))
Beispiel #24
0
    def __init__(self,
                 root,
                 validate=True,
                 index_associated=True,
                 include=None,
                 absolute_paths=True,
                 derivatives=False,
                 config=None,
                 sources=None,
                 **kwargs):

        self.validator = BIDSValidator(index_associated=index_associated)
        self.validate = validate
        self.metadata_index = MetadataIndex(self)
        self.derivatives = {}
        self.sources = listify(sources)

        # Validate arguments
        if not isinstance(root, six.string_types):
            raise ValueError("root argument must be a string specifying the"
                             " directory containing the BIDS dataset.")
        if not os.path.exists(root):
            raise ValueError("BIDS root does not exist: %s" % root)

        self.root = root

        target = os.path.join(self.root, 'dataset_description.json')
        if not os.path.exists(target):
            if validate is True:
                raise ValueError(
                    "'dataset_description.json' is missing from project root."
                    " Every valid BIDS dataset must have this file.")
            else:
                self.description = None
        else:
            with open(target, 'r', encoding='utf-8') as desc_fd:
                self.description = json.load(desc_fd)
            if validate is True:
                for k in ['Name', 'BIDSVersion']:
                    if k not in self.description:
                        raise ValueError("Mandatory '%s' field missing from "
                                         "dataset_description.json." % k)

        # Determine which subdirectories to exclude from indexing
        excludes = {"code", "stimuli", "sourcedata", "models", "derivatives"}
        if include is not None:
            include = listify(include)
            if "derivatives" in include:
                raise ValueError("Do not pass 'derivatives' in the include "
                                 "list. To index derivatives, either set "
                                 "derivatives=True, or use add_derivatives().")
            excludes -= set([d.strip(os.path.sep) for d in include])
        self._exclude_dirs = list(excludes)

        # Set up path and config for grabbit
        if config is None:
            config = 'bids'
        config_paths = get_option('config_paths')
        path = (root, [config_paths[c] for c in listify(config)])

        # Initialize grabbit Layout
        super(BIDSLayout, self).__init__(path,
                                         root=self.root,
                                         dynamic_getters=True,
                                         absolute_paths=absolute_paths,
                                         **kwargs)

        # Add derivatives if any are found
        self.derivatives = {}
        if derivatives:
            if derivatives is True:
                derivatives = os.path.join(root, 'derivatives')
            self.add_derivatives(derivatives,
                                 validate=validate,
                                 index_associated=index_associated,
                                 include=include,
                                 absolute_paths=absolute_paths,
                                 derivatives=None,
                                 config=None,
                                 sources=self,
                                 **kwargs)