def test_parse_file_entities(): filename = '/sub-03_ses-07_run-4_desc-bleargh_sekret.nii.gz' # Test with entities taken from bids config target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret', 'extension': 'nii.gz'} assert target == parse_file_entities(filename, config='bids') config = Config.load('bids') assert target == parse_file_entities(filename, config=[config]) # Test with entities taken from bids and derivatives config target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret', 'desc': 'bleargh', 'extension': 'nii.gz'} assert target == parse_file_entities(filename) assert target == parse_file_entities(filename, config=['bids', 'derivatives']) # Test with list of Entities entities = [ Entity('subject', "[/\\\\]sub-([a-zA-Z0-9]+)"), Entity('run', "[_/\\\\]run-0*(\\d+)", dtype=int), Entity('suffix', "[._]*([a-zA-Z0-9]*?)\\.[^/\\\\]+$"), Entity('desc', "desc-([a-zA-Z0-9]+)"), ] # Leave out session to distinguish from previous test target target = {'subject': '03', 'run': 4, 'suffix': 'sekret', 'desc': 'bleargh'} assert target == parse_file_entities(filename, entities=entities)
def test_entity_matches(tmpdir): filename = "aardvark-4-reporting-for-duty.txt" tmpdir.mkdir("tmp").join(filename).write("###") f = BIDSFile(os.path.join(str(tmpdir), filename)) e = Entity('avaricious', r'aardvark-(\d+)') result = e.match_file(f) assert result == '4'
def test_entity_initialization(): e = Entity('avaricious', r'aardvark-(\d+)') assert e.name == 'avaricious' assert e.pattern == r'aardvark-(\d+)' assert not e.mandatory assert e.directory is None assert e.files == {}
def test_entity_add_file(sample_bidsfile): session = create_session() bf = sample_bidsfile e = Entity('prop', r'-(\d+)') t = Tag(file=bf, entity=e, value=4) session.add_all([t, e, bf]) session.commit() assert e.files[bf.path] == 4
def writable_file(tmpdir): engine = create_engine('sqlite://') Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) session = Session() testfile = 'sub-03_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz' fn = tmpdir.mkdir("tmp").join(testfile) fn.write('###') bf = BIDSFile(os.path.join(str(fn))) tag_dict = {'task': 'rest', 'run': 2, 'subject': '3'} ents = {name: Entity(name) for name in tag_dict.keys()} tags = [Tag(bf, ents[k], value=v) for k, v in tag_dict.items()] session.add_all(list(ents.values()) + tags + [bf]) session.commit() return bf
def test_entity_init_with_bad_dtype(): with pytest.raises(ValueError) as exc: ent = Entity('test', dtype='superfloat') msg = exc.value.message assert msg.startswith("Invalid dtype")
def subject_entity(): return Entity('subject', r"[/\\\\]sub-([a-zA-Z0-9]+)", mandatory=False, directory="{subject}", dtype='str')
def index_metadata(self, **filters): """Index metadata for all files in the BIDS dataset. """ # Process JSON files first if we're indexing metadata all_files = self.layout.get(absolute_paths=True, **filters) # Track ALL entities we've seen in file names or metadatas all_entities = {} for c in self.config: all_entities.update(c.entities) # If key/value pairs in JSON files duplicate ones extracted from files, # we can end up with Tag collisions in the DB. To prevent this, we # store all filename/entity pairs and the value, and then check against # that before adding each new Tag. all_tags = {} for t in self.session.query(Tag).all(): key = '{}_{}'.format(t.file_path, t.entity_name) all_tags[key] = str(t.value) # We build up a store of all file data as we iterate files. It looks # like: { extension/suffix: dirname: [(entities, payload)]}}. # The payload is left empty for non-JSON files. file_data = {} for bf in all_files: file_ents = bf.entities.copy() suffix = file_ents.pop('suffix', None) ext = file_ents.pop('extension', None) if suffix is not None and ext is not None: key = "{}/{}".format(ext, suffix) if key not in file_data: file_data[key] = defaultdict(list) if ext == 'json': with open(bf.path, 'r') as handle: try: payload = json.load(handle) except json.JSONDecodeError as e: msg = ("Error occurred while trying to decode JSON" " from file '{}'.".format(bf.path)) raise IOError(msg) from e else: payload = None to_store = (file_ents, payload, bf.path) file_data[key][bf.dirname].append(to_store) # To avoid integrity errors, track primary keys we've seen seen_assocs = set() def create_association_pair(src, dst, kind, kind2=None): kind2 = kind2 or kind pk1 = '#'.join([src, dst, kind]) if pk1 not in seen_assocs: self.session.add(FileAssociation(src=src, dst=dst, kind=kind)) seen_assocs.add(pk1) pk2 = '#'.join([dst, src, kind2]) if pk2 not in seen_assocs: self.session.add(FileAssociation(src=dst, dst=src, kind=kind2)) seen_assocs.add(pk2) # TODO: Efficiency of everything in this loop could be improved filenames = [bf for bf in all_files if not bf.path.endswith('.json')] for bf in filenames: file_ents = bf.entities.copy() suffix = file_ents.pop('suffix', None) ext = file_ents.pop('extension', None) file_ent_keys = set(file_ents.keys()) if suffix is None or ext is None: continue # Extract metadata associated with the file. The idea is # that we loop over parent directories, and if we find # payloads in the file_data store (indexing by directory # and current file suffix), we check to see if the # candidate JS file's entities are entirely consumed by # the current file. If so, it's a valid candidate, and we # add the payload to the stack. Finally, we invert the # stack and merge the payloads in order. ext_key = "{}/{}".format(ext, suffix) json_key = "json/{}".format(suffix) dirname = bf.dirname payloads = [] ancestors = [] while True: # Get JSON payloads json_data = file_data.get(json_key, {}).get(dirname, []) for js_ents, js_md, js_path in json_data: js_keys = set(js_ents.keys()) if js_keys - file_ent_keys: continue matches = [ js_ents[name] == file_ents[name] for name in js_keys ] if all(matches): payloads.append((js_md, js_path)) # Get all files this file inherits from candidates = file_data.get(ext_key, {}).get(dirname, []) for ents, _, path in candidates: keys = set(ents.keys()) if keys - file_ent_keys: continue matches = [ents[name] == file_ents[name] for name in keys] if all(matches): ancestors.append(path) parent = os.path.dirname(dirname) if parent == dirname: break dirname = parent if not payloads: continue # Create DB records for metadata associations js_file = payloads[-1][1] create_association_pair(js_file, bf.path, 'Metadata') # Consolidate metadata by looping over inherited JSON files file_md = {} for pl, js_file in payloads[::-1]: file_md.update(pl) # Create FileAssociation records for JSON inheritance n_pl = len(payloads) for i, (pl, js_file) in enumerate(payloads): if (i + 1) < n_pl: other = payloads[i + 1][1] create_association_pair(js_file, other, 'Child', 'Parent') # Inheritance for current file n_pl = len(ancestors) for i, src in enumerate(ancestors): if (i + 1) < n_pl: dst = ancestors[i + 1] create_association_pair(src, dst, 'Child', 'Parent') # Files with IntendedFor field always get mapped to targets intended = listify(file_md.get('IntendedFor', [])) for target in intended: # Per spec, IntendedFor paths are relative to sub dir. target = os.path.join(self.root, 'sub-{}'.format(bf.entities['subject']), target) create_association_pair(bf.path, target, 'IntendedFor', 'InformedBy') # Link files to BOLD runs if suffix in ['physio', 'stim', 'events', 'sbref']: images = self.layout.get(extension=['nii', 'nii.gz'], suffix='bold', return_type='filename', **file_ents) for img in images: create_association_pair(bf.path, img, 'IntendedFor', 'InformedBy') # Link files to DWI runs if suffix == 'sbref' or ext in ['bvec', 'bval']: images = self.layout.get(extension=['nii', 'nii.gz'], suffix='dwi', return_type='filename', **file_ents) for img in images: create_association_pair(bf.path, img, 'IntendedFor', 'InformedBy') # Create Tag <-> Entity mappings, and any newly discovered Entities for md_key, md_val in file_md.items(): tag_string = '{}_{}'.format(bf.path, md_key) # Skip pairs that were already found in the filenames if tag_string in all_tags: file_val = all_tags[tag_string] if str(md_val) != file_val: msg = ( "Conflicting values found for entity '{}' in " "filename {} (value='{}') versus its JSON sidecar " "(value='{}'). Please reconcile this discrepancy.") raise ValueError( msg.format(md_key, bf.path, file_val, md_val)) continue if md_key not in all_entities: all_entities[md_key] = Entity(md_key, is_metadata=True) self.session.add(all_entities[md_key]) tag = Tag(bf, all_entities[md_key], md_val) self.session.add(tag) if len(self.session.new) >= 1000: self.session.commit() self.session.commit()