コード例 #1
0
ファイル: test_layout.py プロジェクト: pndni/pybids
def test_parse_file_entities():
    filename = '/sub-03_ses-07_run-4_desc-bleargh_sekret.nii.gz'

    # Test with entities taken from bids config
    target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret',
              'extension': 'nii.gz'}
    assert target == parse_file_entities(filename, config='bids')
    config = Config.load('bids')
    assert target == parse_file_entities(filename, config=[config])

    # Test with entities taken from bids and derivatives config
    target = {'subject': '03', 'session': '07', 'run': 4, 'suffix': 'sekret',
              'desc': 'bleargh', 'extension': 'nii.gz'}
    assert target == parse_file_entities(filename)
    assert target == parse_file_entities(filename, config=['bids', 'derivatives'])

    # Test with list of Entities
    entities = [
        Entity('subject', "[/\\\\]sub-([a-zA-Z0-9]+)"),
        Entity('run', "[_/\\\\]run-0*(\\d+)", dtype=int),
        Entity('suffix', "[._]*([a-zA-Z0-9]*?)\\.[^/\\\\]+$"),
        Entity('desc', "desc-([a-zA-Z0-9]+)"),
    ]
    # Leave out session to distinguish from previous test target
    target = {'subject': '03', 'run': 4, 'suffix': 'sekret', 'desc': 'bleargh'}
    assert target == parse_file_entities(filename, entities=entities)
コード例 #2
0
ファイル: test_models.py プロジェクト: fedeadolfi/pybids
def test_entity_matches(tmpdir):
    filename = "aardvark-4-reporting-for-duty.txt"
    tmpdir.mkdir("tmp").join(filename).write("###")
    f = BIDSFile(os.path.join(str(tmpdir), filename))
    e = Entity('avaricious', r'aardvark-(\d+)')
    result = e.match_file(f)
    assert result == '4'
コード例 #3
0
ファイル: test_models.py プロジェクト: fedeadolfi/pybids
def test_entity_initialization():
    e = Entity('avaricious', r'aardvark-(\d+)')
    assert e.name == 'avaricious'
    assert e.pattern == r'aardvark-(\d+)'
    assert not e.mandatory
    assert e.directory is None
    assert e.files == {}
コード例 #4
0
ファイル: test_models.py プロジェクト: fedeadolfi/pybids
def test_entity_add_file(sample_bidsfile):
    session = create_session()
    bf = sample_bidsfile
    e = Entity('prop', r'-(\d+)')
    t = Tag(file=bf, entity=e, value=4)
    session.add_all([t, e, bf])
    session.commit()
    assert e.files[bf.path] == 4
コード例 #5
0
ファイル: test_writing.py プロジェクト: pndni/pybids
def writable_file(tmpdir):
    engine = create_engine('sqlite://')
    Base.metadata.create_all(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    testfile = 'sub-03_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz'
    fn = tmpdir.mkdir("tmp").join(testfile)
    fn.write('###')
    bf = BIDSFile(os.path.join(str(fn)))

    tag_dict = {'task': 'rest', 'run': 2, 'subject': '3'}
    ents = {name: Entity(name) for name in tag_dict.keys()}
    tags = [Tag(bf, ents[k], value=v) for k, v in tag_dict.items()]

    session.add_all(list(ents.values()) + tags + [bf])
    session.commit()
    return bf
コード例 #6
0
ファイル: test_models.py プロジェクト: fedeadolfi/pybids
def test_entity_init_with_bad_dtype():
    with pytest.raises(ValueError) as exc:
        ent = Entity('test', dtype='superfloat')
        msg = exc.value.message
        assert msg.startswith("Invalid dtype")
コード例 #7
0
ファイル: test_models.py プロジェクト: fedeadolfi/pybids
def subject_entity():
    return Entity('subject', r"[/\\\\]sub-([a-zA-Z0-9]+)", mandatory=False,
               directory="{subject}", dtype='str')
コード例 #8
0
ファイル: utils.py プロジェクト: marcelfarres/NiBetaSeries
    def index_metadata(self, **filters):
        """Index metadata for all files in the BIDS dataset. """
        # Process JSON files first if we're indexing metadata
        all_files = self.layout.get(absolute_paths=True, **filters)

        # Track ALL entities we've seen in file names or metadatas
        all_entities = {}
        for c in self.config:
            all_entities.update(c.entities)

        # If key/value pairs in JSON files duplicate ones extracted from files,
        # we can end up with Tag collisions in the DB. To prevent this, we
        # store all filename/entity pairs and the value, and then check against
        # that before adding each new Tag.
        all_tags = {}
        for t in self.session.query(Tag).all():
            key = '{}_{}'.format(t.file_path, t.entity_name)
            all_tags[key] = str(t.value)

        # We build up a store of all file data as we iterate files. It looks
        # like: { extension/suffix: dirname: [(entities, payload)]}}.
        # The payload is left empty for non-JSON files.
        file_data = {}

        for bf in all_files:
            file_ents = bf.entities.copy()
            suffix = file_ents.pop('suffix', None)
            ext = file_ents.pop('extension', None)

            if suffix is not None and ext is not None:
                key = "{}/{}".format(ext, suffix)
                if key not in file_data:
                    file_data[key] = defaultdict(list)

                if ext == 'json':
                    with open(bf.path, 'r') as handle:
                        try:
                            payload = json.load(handle)
                        except json.JSONDecodeError as e:
                            msg = ("Error occurred while trying to decode JSON"
                                   " from file '{}'.".format(bf.path))
                            raise IOError(msg) from e
                else:
                    payload = None

                to_store = (file_ents, payload, bf.path)
                file_data[key][bf.dirname].append(to_store)

        # To avoid integrity errors, track primary keys we've seen
        seen_assocs = set()

        def create_association_pair(src, dst, kind, kind2=None):
            kind2 = kind2 or kind
            pk1 = '#'.join([src, dst, kind])
            if pk1 not in seen_assocs:
                self.session.add(FileAssociation(src=src, dst=dst, kind=kind))
                seen_assocs.add(pk1)
            pk2 = '#'.join([dst, src, kind2])
            if pk2 not in seen_assocs:
                self.session.add(FileAssociation(src=dst, dst=src, kind=kind2))
                seen_assocs.add(pk2)

        # TODO: Efficiency of everything in this loop could be improved
        filenames = [bf for bf in all_files if not bf.path.endswith('.json')]

        for bf in filenames:
            file_ents = bf.entities.copy()
            suffix = file_ents.pop('suffix', None)
            ext = file_ents.pop('extension', None)
            file_ent_keys = set(file_ents.keys())

            if suffix is None or ext is None:
                continue

            # Extract metadata associated with the file. The idea is
            # that we loop over parent directories, and if we find
            # payloads in the file_data store (indexing by directory
            # and current file suffix), we check to see if the
            # candidate JS file's entities are entirely consumed by
            # the current file. If so, it's a valid candidate, and we
            # add the payload to the stack. Finally, we invert the
            # stack and merge the payloads in order.
            ext_key = "{}/{}".format(ext, suffix)
            json_key = "json/{}".format(suffix)
            dirname = bf.dirname

            payloads = []
            ancestors = []

            while True:
                # Get JSON payloads
                json_data = file_data.get(json_key, {}).get(dirname, [])
                for js_ents, js_md, js_path in json_data:
                    js_keys = set(js_ents.keys())
                    if js_keys - file_ent_keys:
                        continue
                    matches = [
                        js_ents[name] == file_ents[name] for name in js_keys
                    ]
                    if all(matches):
                        payloads.append((js_md, js_path))

                # Get all files this file inherits from
                candidates = file_data.get(ext_key, {}).get(dirname, [])
                for ents, _, path in candidates:
                    keys = set(ents.keys())
                    if keys - file_ent_keys:
                        continue
                    matches = [ents[name] == file_ents[name] for name in keys]
                    if all(matches):
                        ancestors.append(path)

                parent = os.path.dirname(dirname)
                if parent == dirname:
                    break
                dirname = parent

            if not payloads:
                continue

            # Create DB records for metadata associations
            js_file = payloads[-1][1]
            create_association_pair(js_file, bf.path, 'Metadata')

            # Consolidate metadata by looping over inherited JSON files
            file_md = {}
            for pl, js_file in payloads[::-1]:
                file_md.update(pl)

            # Create FileAssociation records for JSON inheritance
            n_pl = len(payloads)
            for i, (pl, js_file) in enumerate(payloads):
                if (i + 1) < n_pl:
                    other = payloads[i + 1][1]
                    create_association_pair(js_file, other, 'Child', 'Parent')

            # Inheritance for current file
            n_pl = len(ancestors)
            for i, src in enumerate(ancestors):
                if (i + 1) < n_pl:
                    dst = ancestors[i + 1]
                    create_association_pair(src, dst, 'Child', 'Parent')

            # Files with IntendedFor field always get mapped to targets
            intended = listify(file_md.get('IntendedFor', []))
            for target in intended:
                # Per spec, IntendedFor paths are relative to sub dir.
                target = os.path.join(self.root,
                                      'sub-{}'.format(bf.entities['subject']),
                                      target)
                create_association_pair(bf.path, target, 'IntendedFor',
                                        'InformedBy')

            # Link files to BOLD runs
            if suffix in ['physio', 'stim', 'events', 'sbref']:
                images = self.layout.get(extension=['nii', 'nii.gz'],
                                         suffix='bold',
                                         return_type='filename',
                                         **file_ents)
                for img in images:
                    create_association_pair(bf.path, img, 'IntendedFor',
                                            'InformedBy')

            # Link files to DWI runs
            if suffix == 'sbref' or ext in ['bvec', 'bval']:
                images = self.layout.get(extension=['nii', 'nii.gz'],
                                         suffix='dwi',
                                         return_type='filename',
                                         **file_ents)
                for img in images:
                    create_association_pair(bf.path, img, 'IntendedFor',
                                            'InformedBy')

            # Create Tag <-> Entity mappings, and any newly discovered Entities
            for md_key, md_val in file_md.items():
                tag_string = '{}_{}'.format(bf.path, md_key)
                # Skip pairs that were already found in the filenames
                if tag_string in all_tags:
                    file_val = all_tags[tag_string]
                    if str(md_val) != file_val:
                        msg = (
                            "Conflicting values found for entity '{}' in "
                            "filename {} (value='{}') versus its JSON sidecar "
                            "(value='{}'). Please reconcile this discrepancy.")
                        raise ValueError(
                            msg.format(md_key, bf.path, file_val, md_val))
                    continue
                if md_key not in all_entities:
                    all_entities[md_key] = Entity(md_key, is_metadata=True)
                    self.session.add(all_entities[md_key])
                tag = Tag(bf, all_entities[md_key], md_val)
                self.session.add(tag)

            if len(self.session.new) >= 1000:
                self.session.commit()

        self.session.commit()