def test_Source_from_entry(mocker): src = Source.from_entry( 'xyz', mocker.Mock(type='misc', fields={'title': 'abc'}, persons=None)) assert src.id == 'xyz' assert src.genre == 'misc' assert 'author' not in src assert src['title'] == 'abc' assert '{0}'.format(src) == 'n.d. abc.' assert repr(src) == '<Source xyz>' src = Source.from_entry( 'xyz', mocker.Mock(type='misc', fields={'title': 'abc'}, persons={'author': ['Alfred E. Neumann', 'T. M.']})) assert src['author'] == 'Alfred E. Neumann and T. M.'
def bibliography(self) -> typing.Dict[str, Source]: """ :returns: `dict` mapping BibTeX IDs to `Reference` instances. """ return to_dict( Source.from_entry(key, entry) for key, entry in pybtex.database.parse_string( self.bibfile.read_text( encoding='utf8'), bib_format='bibtex').entries.items())
def bibliography(self): """ :returns: `dict` mapping BibTeX IDs to `Reference` instances. """ refs = [] with self.bibfile.open(encoding='utf8') as fp: for key, entry in pybtex.database.parse_string( fp.read(), bib_format='bibtex').entries.items(): refs.append(Source.from_entry(key, entry)) return to_dict(refs)
def __init__(self, repos=None, datasets=None, concepticon=None): API.__init__(self, repos) self.datasets = datasets or collections.OrderedDict() concepticon = concepticon if not concepticon: # pragma: no cover try: concepticon = Concepticon( Config.from_file().get_clone('concepticon')) except KeyError: pass datasets = set() self.annotations = collections.defaultdict( lambda: collections.OrderedDict()) for row in reader(self.repos / 'norare.tsv', delimiter='\t', dicts=True): self.annotations[row['DATASET']][row['NAME'].lower()] = { k.lower(): row[k] for k in [ 'DATASET', 'NAME', 'LANGUAGE', 'STRUCTURE', 'TYPE', 'NORARE', 'RATING', 'SOURCE', 'OTHER', 'NOTE' ] } datasets.add(row['DATASET']) # get bibliography self.refs = collections.OrderedDict() with self.repos.joinpath( 'references', 'references.bib').open(encoding='utf-8') as fp: for key, entry in pybtex.database.parse_string( fp.read(), bib_format='bibtex').entries.items(): self.refs[key] = Source.from_entry(key, entry) all_refs = set(self.refs) if concepticon: all_refs = all_refs.union(concepticon.bibliography) for row in reader(self.repos / 'concept_set_meta.tsv', delimiter='\t', dicts=True): row['norare'] = self row['path'] = self.repos.joinpath('concept_set_meta', row['ID'], row['ID'] + '.tsv-metadata.json') self.datasets[row['ID']] = ConceptSetMeta( **{k.lower(): v for k, v in row.items()}) self.datasets[row['ID']].source_language = [ lg.lower().strip() for lg in self.datasets[row['ID']].source_language.split(',') ] # remaining datasets come from concepticon, we identify them from datasets concepticon_datasets = [d for d in datasets if d not in self.datasets] for dataset in concepticon_datasets: ds = concepticon.conceptlists[dataset] self.datasets[ds.id] = ConceptSetMeta( id=ds.id, author=ds.author, year=ds.year, tags=', '.join(ds.tags), source_language=ds.source_language, target_language=ds.target_language, url=ds.url, refs=ds.refs, note=ds.note, alias=ds.alias, norare=self, path=concepticon.repos.joinpath('concepticondata', 'conceptlists', ds.id + '.tsv-metadata.json')) for dataset in self.datasets.values(): if dataset.refs: refs = [dataset.refs] if isinstance(dataset.refs, str) else dataset.refs for ref in refs: if ref not in all_refs: # pragma: no cover raise ValueError( 'missing references.bib: {}'.format(ref))