Esempio n. 1
0
def test_Source_from_entry(mocker):
    src = Source.from_entry(
        'xyz', mocker.Mock(type='misc', fields={'title': 'abc'}, persons=None))
    assert src.id == 'xyz'
    assert src.genre == 'misc'
    assert 'author' not in src
    assert src['title'] == 'abc'
    assert '{0}'.format(src) == 'n.d. abc.'
    assert repr(src) == '<Source xyz>'

    src = Source.from_entry(
        'xyz',
        mocker.Mock(type='misc',
                    fields={'title': 'abc'},
                    persons={'author': ['Alfred E. Neumann', 'T. M.']}))
    assert src['author'] == 'Alfred E. Neumann and T. M.'
Esempio n. 2
0
 def bibliography(self) -> typing.Dict[str, Source]:
     """
     :returns: `dict` mapping BibTeX IDs to `Reference` instances.
     """
     return to_dict(
         Source.from_entry(key, entry)
         for key, entry in pybtex.database.parse_string(
             self.bibfile.read_text(
                 encoding='utf8'), bib_format='bibtex').entries.items())
Esempio n. 3
0
 def bibliography(self):
     """
     :returns: `dict` mapping BibTeX IDs to `Reference` instances.
     """
     refs = []
     with self.bibfile.open(encoding='utf8') as fp:
         for key, entry in pybtex.database.parse_string(
                 fp.read(), bib_format='bibtex').entries.items():
             refs.append(Source.from_entry(key, entry))
     return to_dict(refs)
Esempio n. 4
0
    def __init__(self, repos=None, datasets=None, concepticon=None):
        API.__init__(self, repos)
        self.datasets = datasets or collections.OrderedDict()

        concepticon = concepticon
        if not concepticon:  # pragma: no cover
            try:
                concepticon = Concepticon(
                    Config.from_file().get_clone('concepticon'))
            except KeyError:
                pass

        datasets = set()
        self.annotations = collections.defaultdict(
            lambda: collections.OrderedDict())
        for row in reader(self.repos / 'norare.tsv',
                          delimiter='\t',
                          dicts=True):
            self.annotations[row['DATASET']][row['NAME'].lower()] = {
                k.lower(): row[k]
                for k in [
                    'DATASET', 'NAME', 'LANGUAGE', 'STRUCTURE', 'TYPE',
                    'NORARE', 'RATING', 'SOURCE', 'OTHER', 'NOTE'
                ]
            }
            datasets.add(row['DATASET'])

        # get bibliography
        self.refs = collections.OrderedDict()
        with self.repos.joinpath(
                'references', 'references.bib').open(encoding='utf-8') as fp:
            for key, entry in pybtex.database.parse_string(
                    fp.read(), bib_format='bibtex').entries.items():
                self.refs[key] = Source.from_entry(key, entry)

        all_refs = set(self.refs)
        if concepticon:
            all_refs = all_refs.union(concepticon.bibliography)

        for row in reader(self.repos / 'concept_set_meta.tsv',
                          delimiter='\t',
                          dicts=True):
            row['norare'] = self
            row['path'] = self.repos.joinpath('concept_set_meta', row['ID'],
                                              row['ID'] + '.tsv-metadata.json')
            self.datasets[row['ID']] = ConceptSetMeta(
                **{k.lower(): v
                   for k, v in row.items()})
            self.datasets[row['ID']].source_language = [
                lg.lower().strip()
                for lg in self.datasets[row['ID']].source_language.split(',')
            ]

        # remaining datasets come from concepticon, we identify them from datasets
        concepticon_datasets = [d for d in datasets if d not in self.datasets]
        for dataset in concepticon_datasets:
            ds = concepticon.conceptlists[dataset]
            self.datasets[ds.id] = ConceptSetMeta(
                id=ds.id,
                author=ds.author,
                year=ds.year,
                tags=', '.join(ds.tags),
                source_language=ds.source_language,
                target_language=ds.target_language,
                url=ds.url,
                refs=ds.refs,
                note=ds.note,
                alias=ds.alias,
                norare=self,
                path=concepticon.repos.joinpath('concepticondata',
                                                'conceptlists',
                                                ds.id + '.tsv-metadata.json'))

        for dataset in self.datasets.values():
            if dataset.refs:
                refs = [dataset.refs] if isinstance(dataset.refs,
                                                    str) else dataset.refs
                for ref in refs:
                    if ref not in all_refs:  # pragma: no cover
                        raise ValueError(
                            'missing references.bib: {}'.format(ref))