Esempio n. 1
0
 def __init__(self, repos=None):
     """
     :param repos: Path to a clone or source dump of concepticon-data.
     """
     if (repos is None) and cldfcatalog:
         repos = cldfcatalog.Config.from_file().get_clone('concepticon')
     API.__init__(self, repos)
     self._to_mapping = {}
Esempio n. 2
0
 def __init__(self,
              repos: typing.Optional[typing.Union[str,
                                                  pathlib.Path]] = None):
     """
     :param repos: Path to a clone or source dump of concepticon-data.
     """
     repos = repos or cldfcatalog.Config.from_file().get_clone(
         'concepticon')
     API.__init__(self, repos)
     self._to_mapping = {}
Esempio n. 3
0
 def __init__(self, repos='.'):
     API.__init__(self, repos=repos)
     self.repos = self.repos.resolve()
     self.tree = self.repos / 'languoids' / 'tree'
     if not self.tree.exists():
         raise ValueError('repos dir %s missing tree dir: %s' % (self.repos, self.tree))
     if not self.repos.joinpath('references').exists():
         raise ValueError('repos dir %s missing references subdir' % (self.repos,))
     for name, cls in self.__config__.items():
         fname = self.repos / 'config' / (name + '.ini')
         setattr(self, name, config.Config.from_ini(fname, object_class=cls))
Esempio n. 4
0
 def __init__(self, repos='.', *, cache: bool = False):
     """
     :param repos: Path to a copy of `<https://github.com/glottolog/glottolog>`_
     :param cache: Indicate whether to cache `Languoid` objects or not. If `True`, the API must \
     be used read-only.
     """
     API.__init__(self, repos=repos)
     #: Absolute path to the copy of the data repository:
     self.repos: pathlib.Path = pathlib.Path.cwd() / self.repos
     #: Absolute path to the `tree` directory in the repos.
     self.tree: pathlib.Path = self.repos / 'languoids' / 'tree'
     if not self.tree.exists():
         raise ValueError('repos dir %s missing tree dir: %s' %
                          (self.repos, self.tree))
     if not self.repos.joinpath('references').exists():
         raise ValueError('repos dir %s missing references subdir' %
                          (self.repos, ))
     self.cache = Cache() if cache else None
Esempio n. 5
0
File: api.py Progetto: clics/pyclics
    def __init__(self, repos=None):
        API.__init__(self, repos=repos)

        # Initialize component registry:
        self.gsm = getGlobalSiteManager()

        # Add methods to register utilities to the registry, so plugins don't have to deal with
        # ZCA details at all:
        self.gsm.register_clusterer = register_clusterer.__get__(self.gsm)
        self.gsm.register_colexifier = register_colexifier.__get__(self.gsm)
        self.gsm.register_clicsform = register_clicsfom.__get__(self.gsm)

        # Load defaults for pluggable functionality first:
        plugin.includeme(self.gsm)

        # Now load third-party plugins:
        for ep in pkg_resources.iter_entry_points('clics.plugin'):
            ep.load()(self.gsm)
Esempio n. 6
0
 def __init__(self, repos=None):
     """
     :param repos: Path to a clone or source dump of concepticon-data.
     """
     API.__init__(self, repos or REPOS_PATH)
     self._to_mapping = {}
Esempio n. 7
0
 def __init__(self, repos, wiki=None):
     API.__init__(self, repos)
     self.wiki = wiki or self.repos.resolve().parent / 'grambank.wiki'
     self.gb20 = GB20(self.path('gb20.txt'))
Esempio n. 8
0
    def __init__(self, repos=None, datasets=None, concepticon=None):
        API.__init__(self, repos)
        self.datasets = datasets or collections.OrderedDict()

        concepticon = concepticon
        if not concepticon:  # pragma: no cover
            try:
                concepticon = Concepticon(
                    Config.from_file().get_clone('concepticon'))
            except KeyError:
                pass

        datasets = set()
        self.annotations = collections.defaultdict(
            lambda: collections.OrderedDict())
        for row in reader(self.repos / 'norare.tsv',
                          delimiter='\t',
                          dicts=True):
            self.annotations[row['DATASET']][row['NAME'].lower()] = {
                k.lower(): row[k]
                for k in [
                    'DATASET', 'NAME', 'LANGUAGE', 'STRUCTURE', 'TYPE',
                    'NORARE', 'RATING', 'SOURCE', 'OTHER', 'NOTE'
                ]
            }
            datasets.add(row['DATASET'])

        # get bibliography
        self.refs = collections.OrderedDict()
        with self.repos.joinpath(
                'references', 'references.bib').open(encoding='utf-8') as fp:
            for key, entry in pybtex.database.parse_string(
                    fp.read(), bib_format='bibtex').entries.items():
                self.refs[key] = Source.from_entry(key, entry)

        all_refs = set(self.refs)
        if concepticon:
            all_refs = all_refs.union(concepticon.bibliography)

        for row in reader(self.repos / 'concept_set_meta.tsv',
                          delimiter='\t',
                          dicts=True):
            row['norare'] = self
            row['path'] = self.repos.joinpath('concept_set_meta', row['ID'],
                                              row['ID'] + '.tsv-metadata.json')
            self.datasets[row['ID']] = ConceptSetMeta(
                **{k.lower(): v
                   for k, v in row.items()})
            self.datasets[row['ID']].source_language = [
                lg.lower().strip()
                for lg in self.datasets[row['ID']].source_language.split(',')
            ]

        # remaining datasets come from concepticon, we identify them from datasets
        concepticon_datasets = [d for d in datasets if d not in self.datasets]
        for dataset in concepticon_datasets:
            ds = concepticon.conceptlists[dataset]
            self.datasets[ds.id] = ConceptSetMeta(
                id=ds.id,
                author=ds.author,
                year=ds.year,
                tags=', '.join(ds.tags),
                source_language=ds.source_language,
                target_language=ds.target_language,
                url=ds.url,
                refs=ds.refs,
                note=ds.note,
                alias=ds.alias,
                norare=self,
                path=concepticon.repos.joinpath('concepticondata',
                                                'conceptlists',
                                                ds.id + '.tsv-metadata.json'))

        for dataset in self.datasets.values():
            if dataset.refs:
                refs = [dataset.refs] if isinstance(dataset.refs,
                                                    str) else dataset.refs
                for ref in refs:
                    if ref not in all_refs:  # pragma: no cover
                        raise ValueError(
                            'missing references.bib: {}'.format(ref))