def __init__(self, name, config, root): """ A set of rules that applies to one or more directories within a Layout. Args: name (str): The name of the Domain. config (dict): The configuration dictionary that defines the entities and paths for the current domain. root (str, list): The root directory or directories to which the Domain's rules applies. Can be either a single path, or a list. """ self.name = name self.config = config self.root = root self.entities = {} self.files = [] self.path_patterns = [] self.include = listify(self.config.get('include', [])) self.exclude = listify(self.config.get('exclude', [])) if self.include and self.exclude: raise ValueError("The 'include' and 'exclude' arguments cannot " "both be set. Please pass at most one of these " "for domain '%s'." % self.name) if 'default_path_patterns' in config: self.path_patterns += listify(config['default_path_patterns'])
def __init__(self, config): """ A set of rules that applies to one or more directories within a Layout. Args: name (str): The name of the Domain. config (dict): The configuration dictionary that defines the entities and paths for the current domain. """ self.name = config['name'] self.config = config self.entities = {} self.files = [] self.include = listify(self.config.get('include', [])) self.exclude = listify(self.config.get('exclude', [])) if self.include and self.exclude: raise ValueError("The 'include' and 'exclude' arguments cannot " "both be set. Please pass at most one of these " "for domain '%s'." % self.name) self.path_patterns = listify(config.get('default_path_patterns', []))
def _matches(self, entities=None, extensions=None, domains=None, regex_search=False): """ Checks whether the file matches all of the passed entities and extensions. Args: entities (dict): A dictionary of entity names -> regex patterns. extensions (str, list): One or more file extensions to allow. domains (str, list): One or more domains the file must match. regex_search (bool): Whether to require exact match (False) or regex search (True) when comparing the query string to each entity. Returns: True if _all_ entities and extensions match; False otherwise. """ if extensions is not None: if isinstance(extensions, six.string_types): extensions = [extensions] extensions = '(' + '|'.join(extensions) + ')$' if re.search(extensions, self.filename) is None: return False if domains is not None: domains = listify(domains) if not set(self.domains) & set(domains): return False if entities is not None: for name, val in entities.items(): if (name not in self.tags) ^ (val is None): return False if val is None: continue def make_patt(x): patt = '%s' % x if isinstance(x, (int, float)): # allow for leading zeros if a number was specified # regardless of regex_search patt = '0*' + patt if not regex_search: patt = '^%s$' % patt return patt ent_patts = [make_patt(x) for x in listify(val)] patt = '|'.join(ent_patts) if re.search(patt, str(self.tags[name].value)) is None: return False return True
def __init__(self, paths, root=None, validate=False, index_associated=True, include=None, exclude=None, absolute_paths=True, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate # Determine which configs to load conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json') all_confs = ['bids', 'derivatives'] def map_conf(x): if isinstance(x, six.string_types) and x in all_confs: return conf_path % x return x paths = listify(paths, ignore=list) for i, p in enumerate(paths): if isinstance(p, six.string_types): paths[i] = (p, conf_path % 'bids') if len(paths) == 1 and root is None: root = p elif isinstance(p, tuple): doms = [map_conf(d) for d in listify(p[1])] paths[i] = (p[0], doms) self.root = '/' if root is None else root target = pathjoin(self.root, 'dataset_description.json') if not exists(target): warnings.warn("'dataset_description.json' file is missing from " "project root. You may want to set the root path to " "a valid BIDS project.") self.description = None else: self.description = json.load(open(target, 'r')) for k in ['Name', 'BIDSVersion']: if k not in self.description: raise ValueError("Mandatory '%s' field missing from " "dataset_description.json." % k) super(BIDSLayout, self).__init__(paths, root=root, dynamic_getters=True, include=include, exclude=exclude, absolute_paths=absolute_paths, **kwargs)
def _index_file(self, root, f, domains, update_layout=True): # Create the file object--allows for subclassing f = self._make_file_object(root, f) for d in listify(domains): if d not in self.domains: raise ValueError("Cannot index file '%s' in domain '%s'; " "no domain with that name exists." % (f.path, d)) domain = self.domains[d] match_vals = {} for e in domain.entities.values(): m = e.match_file(f) if m is None and e.mandatory: break if m is not None: match_vals[e.name] = (e, m) if match_vals: for k, (ent, val) in match_vals.items(): f.tags[k] = Tag(ent, val) if update_layout: ent.add_file(f.path, val) if update_layout: domain.add_file(f) self.files[f.path] = f return f
def _load_config(self, config): if isinstance(config, six.string_types): config = json.load(open(config, 'r')) elif isinstance(config, list): merged = {} for c in config: if isinstance(c, six.string_types): c = json.load(open(c, 'r')) merged.update(c) config = merged for e in config['entities']: self.add_entity(**e) if 'index' in config: self.filtering_regex = config['index'] if self.filtering_regex.get('include') and \ self.filtering_regex.get('exclude'): raise ValueError("You can only define either include or " "exclude regex, not both.") if 'default_path_patterns' in config: self.path_patterns += listify(config['default_path_patterns']) return config
def _index_file(self, root, f, domains, update_layout=True): # Create the file object--allows for subclassing f = self._make_file_object(root, f) for domain in listify(domains): domain = self.domains[domain] match_vals = {} for e in domain.entities.values(): m = e.match_file(f) if m is None and e.mandatory: break if m is not None: match_vals[e.name] = (e, m) if match_vals: for k, (ent, val) in match_vals.items(): f.tags[k] = Tag(ent, val) if update_layout: ent.add_file(f.path, val) if update_layout: domain.add_file(f) if update_layout: f.domains = domains self.files[f.path] = f return f
def _check_inclusions(self, f, domains=None, fullpath=True): ''' Check file or directory against regexes in config to determine if it should be included in the index ''' filename = f if isinstance(f, six.string_types) else f.path if not fullpath: filename = basename(filename) if domains is None: domains = list(self.domains.keys()) domains = [self.domains[dom] for dom in listify(domains)] # Inject the Layout at the first position for global include/exclude domains.insert(0, self) for dom in domains: # If file matches any include regex, then True if dom.include: for regex in dom.include: if re.search(regex, filename): return True return False else: # If file matches any exclude regex, then False for regex in dom.exclude: if re.search(regex, filename, flags=re.UNICODE): return False return True
def __init__(self, name, pattern=None, domain=None, mandatory=False, directory=None, map_func=None, dtype=None, aliases=None, **kwargs): """ Represents a single entity defined in the JSON config. Args: name (str): The name of the entity (e.g., 'subject', 'run', etc.) pattern (str): A regex pattern used to match against file names. Must define at least one group, and only the first group is kept as the match. domain (Domain): The Domain the Entity belongs to. mandatory (bool): If True, every File _must_ match this entity. directory (str): Optional pattern defining a directory associated with the entity. map_func (callable): Optional callable used to extract the Entity's value from the passed string (instead of trying to match on the defined .pattern). dtype (str): The optional data type of the Entity values. Must be one of 'int', 'float', 'bool', or 'str'. If None, no type enforcement will be attempted, which means the dtype of the value may be unpredictable. aliases (str or list): Alternative names for the entity. kwargs (dict): Additional keyword arguments. """ if pattern is None and map_func is None: raise ValueError("Invalid specification for Entity '%s'; no " "pattern or mapping function provided. Either the" " 'pattern' or the 'map_func' arguments must be " "set." % name) self.name = name self.pattern = pattern self.domain = domain self.mandatory = mandatory self.directory = directory self.map_func = map_func self.kwargs = kwargs if isinstance(dtype, six.string_types): dtype = eval(dtype) if dtype not in [str, float, int, bool, None]: raise ValueError("Invalid dtype '%s'. Must be one of int, float, " "bool, or str." % dtype) self.dtype = dtype self.files = {} self.regex = re.compile(pattern) if pattern is not None else None domain_name = getattr(domain, 'name', '') self.id = '.'.join([domain_name, name]) aliases = [] if aliases is None else listify(aliases) self.aliases = ['.'.join([domain_name, alias]) for alias in aliases]
def _get_domains_for_file(self, f): if isinstance(f, File): return f.domains domains = [] for d in self.domains.values(): for path in listify(d.root): if f.startswith(path): domains.append(d.name) break return domains
def __call__(cls, path, *args, **kwargs): paths = listify(path) if len(paths) == 1: return super(LayoutMetaclass, cls).__call__(path, *args, **kwargs) layouts = [] for p in paths: layout = super(LayoutMetaclass, cls).__call__(p, *args, **kwargs) layouts.append(layout) return merge_layouts(layouts)
def __init__(self, path, config=None, validate=False, index_associated=True, include=None, exclude=None, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate # Determine which configs to load conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json') _all_doms = ['bids', 'derivatives'] if config is None: config = ['bids', 'derivatives'] configs = [] def _load_config(conf): if isinstance(conf, six.string_types): if conf in _all_doms: conf = conf_path % conf conf = json.load(open(conf, 'r')) return conf for conf in listify(config): if isinstance(conf, tuple): _conf = _load_config(conf[0]).copy() if isinstance(conf[1], dict): _conf.update(conf[1]) else: _conf['root'] = conf[1] configs.append(_conf) else: configs.append(_load_config(conf)) # If 'bids' isn't in the list, the user probably made a mistake... if not any([c['name'] != 'bids' for c in configs]): warnings.warn("The core BIDS configuration was not included in the" " config list. If you override the default value for" " config, you probably want to make sure 'bids' is " "included in the list of values.") super(BIDSLayout, self).__init__(path, config=configs, dynamic_getters=True, include=include, exclude=exclude, **kwargs)
def add_derivatives(self, path, **kwargs): ''' Add BIDS-Derivatives datasets to tracking. Args: path (str, list): One or more paths to BIDS-Derivatives datasets. Each path can point to either a derivatives/ directory containing one more more pipeline directories, or to a single pipeline directory (e.g., derivatives/fmriprep). kwargs (dict): Optional keyword arguments to pass on to BIDSLayout() when initializing each of the derivative datasets. ''' paths = listify(path) deriv_dirs = [] # Collect all paths that contain a dataset_description.json def check_for_description(dir): dd = os.path.join(dir, 'dataset_description.json') return os.path.exists(dd) for p in paths: p = os.path.abspath(p) if check_for_description(p): deriv_dirs.append(p) else: subdirs = [ d for d in os.listdir(p) if os.path.isdir(os.path.join(p, d)) ] for sd in subdirs: sd = os.path.join(p, sd) if check_for_description(sd): deriv_dirs.append(sd) for deriv in deriv_dirs: dd = os.path.join(deriv, 'dataset_description.json') description = json.load(open(dd, 'r')) pipeline_name = description.get('PipelineDescription.Name', None) if pipeline_name is None: raise ValueError("Every valid BIDS-derivatives dataset must " "have a PipelineDescription.Name field set " "inside dataset_description.json.") if pipeline_name in self.derivatives: raise ValueError("Pipeline name '%s' has already been added " "to this BIDSLayout. Every added pipeline " "must have a unique name!") # Default config and sources values kwargs['config'] = kwargs.get('config') or ['bids', 'derivatives'] kwargs['sources'] = kwargs.get('sources') or self self.derivatives[pipeline_name] = BIDSLayout(deriv, **kwargs)
def _get_fieldmaps(self, path): sub = os.path.split(path)[1].split("_")[0].split("sub-")[1] fieldmap_set = [] suffix = '(phase1|phasediff|epi|fieldmap)' files = self.get(subject=sub, suffix=suffix, extensions=['nii.gz', 'nii']) for file in files: metadata = self.get_metadata(file.path) if metadata and "IntendedFor" in metadata.keys(): intended_for = listify(metadata["IntendedFor"]) if any([path.endswith(_suff) for _suff in intended_for]): cur_fieldmap = {} if file.suffix == "phasediff": cur_fieldmap = { "phasediff": file.path, "magnitude1": file.path.replace("phasediff", "magnitude1"), "suffix": "phasediff" } magnitude2 = file.path.replace("phasediff", "magnitude2") if os.path.isfile(magnitude2): cur_fieldmap['magnitude2'] = magnitude2 elif file.suffix == "phase1": cur_fieldmap["phase1"] = file.path cur_fieldmap["magnitude1"] = \ file.path.replace("phase1", "magnitude1") cur_fieldmap["phase2"] = \ file.path.replace("phase1", "phase2") cur_fieldmap["magnitude2"] = \ file.path.replace("phase1", "magnitude2") cur_fieldmap["suffix"] = "phase" elif file.suffix == "epi": cur_fieldmap["epi"] = file.path cur_fieldmap["suffix"] = "epi" elif file.suffix == "fieldmap": cur_fieldmap["fieldmap"] = file.path cur_fieldmap["magnitude"] = \ file.path.replace("fieldmap", "magnitude") cur_fieldmap["suffix"] = "fieldmap" fieldmap_set.append(cur_fieldmap) return fieldmap_set
def _load_domain(self, config, root=None, from_init=False): if isinstance(config, six.string_types): if isdir(config): config = join(config, self.config_filename) if not exists(config): raise ValueError("Config file '%s' cannot be found." % config) config_filename = config config = json.load(open(config, 'r')) if root is None and not from_init: root = dirname(abspath(config_filename)) if 'name' not in config: raise ValueError("Config file missing 'name' attribute.") if config['name'] in self.domains: raise ValueError("Config with name '%s' already exists in " "Layout. Name of each config file must be " "unique across entire Layout." % config['name']) if root is None and from_init: # warnings.warn("No valid root directory found for domain '%s'. " # "Falling back on root directory for Layout (%s)." # % (config['name'], self.root)) root = self.root if config.get('root') in [None, '.']: config['root'] = root for root in listify(config['root']): if not exists(root): raise ValueError("Root directory %s for domain %s does not " "exist!" % (root, config['name'])) # Load entities domain = Domain(config['name'], config) for e in config.get('entities', []): self.add_entity(domain=domain, **e) self.domains[domain.name] = domain return domain
def build_path(self, source, path_patterns=None, strict=False, domains=None): ''' Constructs a target filename for a file or dictionary of entities. Args: source (str, File, dict): The source data to use to construct the new file path. Must be one of: - A File object - A string giving the path of a File contained within the current Layout. - A dict of entities, with entity names in keys and values in values path_patterns (list): Optional path patterns to use to construct the new file path. If None, the Layout-defined patterns will be used. strict (bool): If True, all entities must be matched inside a pattern in order to be a valid match. If False, extra entities will be ignored so long as all mandatory entities are found. domains (str, list): Optional name(s) of domain(s) to scan for path patterns. If None, all domains are scanned. If two or more domains are provided, the order determines the precedence of path patterns (i.e., earlier domains will have higher precedence). ''' if isinstance(source, six.string_types): if source not in self.files: source = join(self.root, source) source = self.get_file(source) if isinstance(source, File): source = source.entities if path_patterns is None: if domains is None: domains = list(self.domains.keys()) path_patterns = [] for dom in listify(domains): path_patterns.extend(self.domains[dom].path_patterns) return build_path(source, path_patterns, strict)
def add_derivatives(self, path, **kwargs): ''' Add BIDS-Derivatives datasets to tracking. Args: path (str, list): One or more paths to BIDS-Derivatives datasets. Each path can point to either a derivatives/ directory containing one more more pipeline directories, or to a single pipeline directory (e.g., derivatives/fmriprep). kwargs (dict): Optional keyword arguments to pass on to BIDSLayout() when initializing each of the derivative datasets. ''' paths = listify(path) deriv_dirs = [] # Collect all paths that contain a dataset_description.json def check_for_description(dir): dd = os.path.join(dir, 'dataset_description.json') return os.path.exists(dd) for p in paths: p = os.path.abspath(p) if os.path.exists(p): if check_for_description(p): deriv_dirs.append(p) else: subdirs = [ d for d in os.listdir(p) if os.path.isdir(os.path.join(p, d)) ] for sd in subdirs: sd = os.path.join(p, sd) if check_for_description(sd): deriv_dirs.append(sd) local_entities = set(ent.name for ent in self.entities.values()) for deriv in deriv_dirs: dd = os.path.join(deriv, 'dataset_description.json') with open(dd, 'r', encoding='utf-8') as ddfd: description = json.load(ddfd) pipeline_name = description.get('PipelineDescription', {}).get('Name', None) if pipeline_name is None: raise ValueError("Every valid BIDS-derivatives dataset must " "have a PipelineDescription.Name field set " "inside dataset_description.json.") if pipeline_name in self.derivatives: raise ValueError("Pipeline name '%s' has already been added " "to this BIDSLayout. Every added pipeline " "must have a unique name!") # Default config and sources values kwargs['config'] = kwargs.get('config') or ['bids', 'derivatives'] kwargs['sources'] = kwargs.get('sources') or self self.derivatives[pipeline_name] = BIDSLayout(deriv, **kwargs) # Propagate derivative entities into top-level dynamic getters deriv_entities = set( ent.name for ent in self.derivatives[pipeline_name].entities.values()) for deriv_ent in deriv_entities - local_entities: local_entities.add(deriv_ent) getter = 'get_' + inflect.engine().plural(deriv_ent) if not hasattr(self, getter): func = partial(self.get, target=deriv_ent, return_type='id') setattr(self, getter, func)
def get(self, return_type='object', target=None, extensions=None, derivatives=True, regex_search=None, defined_fields=None, domains=None, **kwargs): """ Retrieve files and/or metadata from the current Layout. Args: return_type (str): Type of result to return. Valid values: 'object' (default): return a list of matching BIDSFile objects. 'file': return a list of matching filenames. 'dir': return a list of directories. 'id': return a list of unique IDs. Must be used together with a valid target. target (str): Optional name of the target entity to get results for (only used if return_type is 'dir' or 'id'). extensions (str, list): One or more file extensions to filter on. Files with any other extensions will be excluded. derivatives (bool, str, list): Whether/how to search associated BIDS-Derivatives datasets. If True (default), all available derivatives are searched. If a str or list, must be the name(s) of the derivatives to search (as defined in the PipelineDescription.Name field in dataset_description.json). regex_search (bool or None): Whether to require exact matching (False) or regex search (True) when comparing the query string to each entity. If None (default), uses the value found in self. defined_fields (list): Optional list of names of metadata fields that must be defined in JSON sidecars in order to consider the file a match, but which don't need to match any particular value. domains (str, list): Domain(s) to search in. Valid values are 'bids' and 'derivatives'. kwargs (dict): Any optional key/values to filter the entities on. Keys are entity names, values are regexes to filter on. For example, passing filter={ 'subject': 'sub-[12]'} would return only files that match the first two subjects. Returns: A list of BIDSFile (default) or other objects (see return_type for details). """ # Warn users still expecting 0.6 behavior if 'type' in kwargs: raise ValueError("As of pybids 0.7.0, the 'type' argument has been" " replaced with 'suffix'.") if derivatives is True: derivatives = list(self.derivatives.keys()) elif derivatives: derivatives = listify(derivatives) # Separate entity kwargs from metadata kwargs ent_kwargs, md_kwargs = {}, {} all_ents = self.get_domain_entities() if derivatives: for deriv in derivatives: deriv_ents = self.derivatives[deriv].get_domain_entities() all_ents.update(deriv_ents) for k, v in kwargs.items(): if k in all_ents: ent_kwargs[k] = v else: md_kwargs[k] = v # Provide some suggestions if target is specified and invalid. if target is not None and target not in all_ents: import difflib potential = list(all_ents.keys()) suggestions = difflib.get_close_matches(target, potential) if suggestions: message = "Did you mean one of: {}?".format(suggestions) else: message = "Valid targets are: {}".format(potential) raise ValueError( ("Unknown target '{}'. " + message).format(target)) all_results = [] # Get entity-based search results using the superclass's get() result = [] result = super(BIDSLayout, self).get(return_type, target=target, extensions=extensions, domains=None, regex_search=regex_search, **ent_kwargs) # Search the metadata if needed if return_type not in {'dir', 'id'}: if md_kwargs: if return_type.startswith('obj'): result = [f.path for f in result] result = self.metadata_index.search(result, defined_fields, **md_kwargs) if return_type.startswith('obj'): result = [self.files[f] for f in result] all_results.append(result) # Add results from derivatives if derivatives: for deriv in derivatives: deriv = self.derivatives[deriv] deriv_res = deriv.get(return_type, target, extensions, None, regex_search, **ent_kwargs) all_results.append(deriv_res) # Flatten results result = list(chain(*all_results)) if return_type in ['dir', 'id']: result = list(set(result)) return result
def __init__(self, paths, root=None, index=None, dynamic_getters=False, absolute_paths=True, regex_search=False, entity_mapper=None, path_patterns=None, config_filename='layout.json', include=None, exclude=None): """ A container for all the files and metadata found at the specified path. Args: paths (str, list): The path(s) where project files are located. Must be one of: - A path to a directory containing files to index - A list of paths to directories to index - A list of 2-tuples where each tuple encodes a mapping from directories to domains. The first element is a string or list giving the paths to one or more directories to index. The second element specifies which domains to apply to the specified files, and can be one of: * A string giving the path to a JSON config file * A dictionary containing config information * A list of any combination of strings or dicts root (str): Optional directory that all other paths will be relative to. If set, every other path the Layout sees must be at this level or below. If None, filesystem root ('/') is used. index (str): Optional path to a saved index file. If a valid value is passed, this index is used to populate Files and Entities, and the normal indexing process (which requires scanning all files in the project) is skipped. dynamic_getters (bool): If True, a get_{entity_name}() method will be dynamically added to the Layout every time a new Entity is created. This is implemented by creating a partial function of the get() function that sets the target argument to the entity name. absolute_paths (bool): If True, grabbit uses absolute file paths everywhere (including when returning query results). If False, the input path will determine the behavior (i.e., relative if a relative path was passed, absolute if an absolute path was passed). regex_search (bool): Whether to require exact matching (True) or regex search (False, default) when comparing the query string to each entity in .get() calls. This sets a default for the instance, but can be overridden in individual .get() requests. entity_mapper (object, str): An optional object containing methods for indexing specific entities. If passed, the object must contain a named method for every value that appears in the JSON config file under the "mapper" key of an Entity's entry. For example, if an entity "type" is defined that contains the key/value pair "mapper": "extract_type", then the passed object must contain an .extract_type() method. Alternatively, the special string "self" can be passed, in which case the current Layout instance will be used as the entity mapper (implying that the user has subclassed Layout). path_patterns (str, list): One or more filename patterns to use as a default path pattern for this layout's files. Can also be specified in the config file. config_filename (str): The name of directory-specific config files. Every directory will be scanned for this file, and if found, the config file will be read in and added to the list of configs. include (str, list): A string or list specifying regexes used to globally filter files when indexing. A file or directory *must* match at least of the passed values in order to be retained in the index. Cannot be used together with 'exclude'. exclude (str, list): A string or list specifying regexes used to globally filter files when indexing. If a file or directory *must* matches any of the passed values, it will be dropped from indexing. Cannot be used together with 'include'. """ if include is not None and exclude is not None: raise ValueError("You cannot specify both the include and exclude" " arguments. Please pass at most one of these.") self.entities = OrderedDict() self.files = {} self.mandatory = set() self.dynamic_getters = dynamic_getters self.regex_search = regex_search self.entity_mapper = self if entity_mapper == 'self' else entity_mapper self.path_patterns = path_patterns if path_patterns else [] self.config_filename = config_filename self.domains = OrderedDict() self.include = listify(include or []) self.exclude = listify(exclude or []) self.absolute_paths = absolute_paths if root is None: root = '/' self.root = abspath(root) self._domain_map = {} # Extract path --> domain mapping self._paths_to_index = {} def add_path(path, val): path = abspath(path) self._paths_to_index[path] = val for p in listify(paths, ignore=list): if isinstance(p, six.string_types): add_path(p, []) else: doms = listify(p[1]) doms = [self._get_or_load_domain(d) for d in doms] for elem in listify(p[0]): add_path(elem, doms) # Verify existence of all paths for p in self._paths_to_index: if not exists(p): raise ValueError("Search path {} doesn't exist.".format(p)) if index is None: self.index() else: self.load_index(index)
def __init__(self, paths, root=None, validate=False, index_associated=True, include=None, exclude=None, absolute_paths=True, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate # Determine which configs to load conf_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config', '%s.json') all_confs = ['bids', 'derivatives'] def map_conf(x): if isinstance(x, six.string_types) and x in all_confs: return conf_path % x return x paths = listify(paths, ignore=list) for i, p in enumerate(paths): if isinstance(p, six.string_types): paths[i] = (p, conf_path % 'bids') elif isinstance(p, tuple): doms = [map_conf(d) for d in listify(p[1])] paths[i] = (p[0], doms) # Set root to longest valid common parent if it isn't explicitly set if root is None: abs_paths = [os.path.abspath(p[0]) for p in paths] root = commonpath(abs_paths) if not root: raise ValueError("One or more invalid paths passed; could not " "find a common parent directory of %s. Either" " make sure the paths are correct, or " "explicitly set the root using the 'root' " "argument." % abs_paths) self.root = root target = os.path.join(self.root, 'dataset_description.json') if not os.path.exists(target): warnings.warn("'dataset_description.json' file is missing from " "project root. You may want to set the root path to " "a valid BIDS project.") self.description = None else: self.description = json.load(open(target, 'r')) for k in ['Name', 'BIDSVersion']: if k not in self.description: raise ValueError("Mandatory '%s' field missing from " "dataset_description.json." % k) super(BIDSLayout, self).__init__(paths, root=root, dynamic_getters=True, include=include, exclude=exclude, absolute_paths=absolute_paths, **kwargs)
def __init__(self, path, config=None, validate=False, index_associated=True, include=None, exclude=None, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate # Determine which configs to load conf_path = pathjoin(dirname(abspath(__file__)), 'config', '%s.json') _all_doms = ['bids', 'derivatives'] if config is None: config = ['bids', 'derivatives'] configs = [] def _load_config(conf): if isinstance(conf, six.string_types): if conf in _all_doms: conf = conf_path % conf conf = json.load(open(conf, 'r')) return conf for conf in listify(config): if isinstance(conf, tuple): _conf = _load_config(conf[0]).copy() if isinstance(conf[1], dict): _conf.update(conf[1]) else: _conf['root'] = conf[1] configs.append(_conf) else: configs.append(_load_config(conf)) # If 'bids' isn't in the list, the user probably made a mistake... if any([c['name'] == 'bids' for c in configs]): # Load and validate information in dataset_description.json target = pathjoin(path, 'dataset_description.json') if not exists(target): if not exists(path): raise ValueError("Root directory does not exist.") raise ValueError( "Mandatory 'dataset_description.json' file is " "missing from project root!") self.description = json.load(open(target, 'r')) for k in ['Name', 'BIDSVersion']: if k not in self.description: raise ValueError("Mandatory '%s' field missing from " "dataset_description.json." % k) else: warnings.warn("The core BIDS configuration was not included in the" " config list. If you override the default value for" " config, you probably want to make sure 'bids' is " "included in the list of values.") super(BIDSLayout, self).__init__(path, config=configs, dynamic_getters=True, include=include, exclude=exclude, **kwargs)
def __init__(self, root=None, config=None, index=None, dynamic_getters=False, absolute_paths=True, regex_search=False, entity_mapper=None, path_patterns=None, config_filename='layout.json', include=None, exclude=None): """ A container for all the files and metadata found at the specified path. Args: root (str): Directory that all other paths will be relative to. Every other path the Layout sees must be at this level or below. domains (str, list, dict): A specification of the configuration object(s) defining domains to use in the Layout. Can be one of: - A dictionary containing config information - A string giving the path to a JSON file containing the config - A string giving the path to a directory containing a configuration file with the name defined in config_filename - A tuple with two elements, where the first element is one of the above (i.e., dict or string), and the second element is an iterable of directories to apply the config to. - A list, where each element is any of the above (dict, string, or tuple). index (str): Optional path to a saved index file. If a valid value is passed, this index is used to populate Files and Entities, and the normal indexing process (which requires scanning all files in the project) is skipped. dynamic_getters (bool): If True, a get_{entity_name}() method will be dynamically added to the Layout every time a new Entity is created. This is implemented by creating a partial function of the get() function that sets the target argument to the entity name. absolute_paths (bool): If True, grabbit uses absolute file paths everywhere (including when returning query results). If False, the input path will determine the behavior (i.e., relative if a relative path was passed, absolute if an absolute path was passed). regex_search (bool): Whether to require exact matching (True) or regex search (False, default) when comparing the query string to each entity in .get() calls. This sets a default for the instance, but can be overridden in individual .get() requests. entity_mapper (object, str): An optional object containing methods for indexing specific entities. If passed, the object must contain a named method for every value that appears in the JSON config file under the "mapper" key of an Entity's entry. For example, if an entity "type" is defined that contains the key/value pair "mapper": "extract_type", then the passed object must contain an .extract_type() method. Alternatively, the special string "self" can be passed, in which case the current Layout instance will be used as the entity mapper (implying that the user has subclassed Layout). path_patterns (str, list): One or more filename patterns to use as a default path pattern for this layout's files. Can also be specified in the config file. config_filename (str): The name of directory-specific config files. Every directory will be scanned for this file, and if found, the config file will be read in and added to the list of configs. include (str, list): A string or list specifying regexes used to globally filter files when indexing. A file or directory *must* match at least of the passed values in order to be retained in the index. Cannot be used together with 'exclude'. exclude (str, list): A string or list specifying regexes used to globally filter files when indexing. If a file or directory *must* matches any of the passed values, it will be dropped from indexing. Cannot be used together with 'include'. """ if include is not None and exclude is not None: raise ValueError("You cannot specify both the include and exclude" " arguments. Please pass at most one of these.") self.entities = OrderedDict() self.files = {} self.mandatory = set() self.dynamic_getters = dynamic_getters self.regex_search = regex_search self.entity_mapper = self if entity_mapper == 'self' else entity_mapper self.path_patterns = path_patterns if path_patterns else [] self.config_filename = config_filename self.domains = OrderedDict() self.include = listify(include or []) self.exclude = listify(exclude or []) self.absolute_paths = absolute_paths self.root = abspath(root) if absolute_paths else root if config is not None: for c in listify(config): if isinstance(c, tuple): c, root = c else: root = None self._load_domain(c, root, True) if index is None: self.index() else: self.load_index(index)
def _get_files(self, root): ''' Returns all files in project (pre-filtering). Extend this in subclasses as needed. ''' results = [os.walk(r, topdown=True) for r in listify(root)] return list(chain(*results))
def __init__(self, root, validate=True, index_associated=True, include=None, absolute_paths=True, derivatives=False, config=None, sources=None, **kwargs): self.validator = BIDSValidator(index_associated=index_associated) self.validate = validate self.metadata_index = MetadataIndex(self) self.derivatives = {} self.sources = listify(sources) # Validate arguments if not isinstance(root, six.string_types): raise ValueError("root argument must be a string specifying the" " directory containing the BIDS dataset.") if not os.path.exists(root): raise ValueError("BIDS root does not exist: %s" % root) self.root = root target = os.path.join(self.root, 'dataset_description.json') if not os.path.exists(target): if validate is True: raise ValueError( "'dataset_description.json' is missing from project root." " Every valid BIDS dataset must have this file.") else: self.description = None else: with open(target, 'r', encoding='utf-8') as desc_fd: self.description = json.load(desc_fd) if validate is True: for k in ['Name', 'BIDSVersion']: if k not in self.description: raise ValueError("Mandatory '%s' field missing from " "dataset_description.json." % k) # Determine which subdirectories to exclude from indexing excludes = {"code", "stimuli", "sourcedata", "models", "derivatives"} if include is not None: include = listify(include) if "derivatives" in include: raise ValueError("Do not pass 'derivatives' in the include " "list. To index derivatives, either set " "derivatives=True, or use add_derivatives().") excludes -= set([d.strip(os.path.sep) for d in include]) self._exclude_dirs = list(excludes) # Set up path and config for grabbit if config is None: config = 'bids' config_paths = get_option('config_paths') path = (root, [config_paths[c] for c in listify(config)]) # Initialize grabbit Layout super(BIDSLayout, self).__init__(path, root=self.root, dynamic_getters=True, absolute_paths=absolute_paths, **kwargs) # Add derivatives if any are found self.derivatives = {} if derivatives: if derivatives is True: derivatives = os.path.join(root, 'derivatives') self.add_derivatives(derivatives, validate=validate, index_associated=index_associated, include=include, absolute_paths=absolute_paths, derivatives=None, config=None, sources=self, **kwargs)