def __init__(self, repos=None): """ :param repos: Path to a clone or source dump of concepticon-data. """ if (repos is None) and cldfcatalog: repos = cldfcatalog.Config.from_file().get_clone('concepticon') API.__init__(self, repos) self._to_mapping = {}
def __init__(self, repos: typing.Optional[typing.Union[str, pathlib.Path]] = None): """ :param repos: Path to a clone or source dump of concepticon-data. """ repos = repos or cldfcatalog.Config.from_file().get_clone( 'concepticon') API.__init__(self, repos) self._to_mapping = {}
def __init__(self, repos='.'): API.__init__(self, repos=repos) self.repos = self.repos.resolve() self.tree = self.repos / 'languoids' / 'tree' if not self.tree.exists(): raise ValueError('repos dir %s missing tree dir: %s' % (self.repos, self.tree)) if not self.repos.joinpath('references').exists(): raise ValueError('repos dir %s missing references subdir' % (self.repos,)) for name, cls in self.__config__.items(): fname = self.repos / 'config' / (name + '.ini') setattr(self, name, config.Config.from_ini(fname, object_class=cls))
def __init__(self, repos='.', *, cache: bool = False): """ :param repos: Path to a copy of `<https://github.com/glottolog/glottolog>`_ :param cache: Indicate whether to cache `Languoid` objects or not. If `True`, the API must \ be used read-only. """ API.__init__(self, repos=repos) #: Absolute path to the copy of the data repository: self.repos: pathlib.Path = pathlib.Path.cwd() / self.repos #: Absolute path to the `tree` directory in the repos. self.tree: pathlib.Path = self.repos / 'languoids' / 'tree' if not self.tree.exists(): raise ValueError('repos dir %s missing tree dir: %s' % (self.repos, self.tree)) if not self.repos.joinpath('references').exists(): raise ValueError('repos dir %s missing references subdir' % (self.repos, )) self.cache = Cache() if cache else None
def __init__(self, repos=None): API.__init__(self, repos=repos) # Initialize component registry: self.gsm = getGlobalSiteManager() # Add methods to register utilities to the registry, so plugins don't have to deal with # ZCA details at all: self.gsm.register_clusterer = register_clusterer.__get__(self.gsm) self.gsm.register_colexifier = register_colexifier.__get__(self.gsm) self.gsm.register_clicsform = register_clicsfom.__get__(self.gsm) # Load defaults for pluggable functionality first: plugin.includeme(self.gsm) # Now load third-party plugins: for ep in pkg_resources.iter_entry_points('clics.plugin'): ep.load()(self.gsm)
def __init__(self, repos=None): """ :param repos: Path to a clone or source dump of concepticon-data. """ API.__init__(self, repos or REPOS_PATH) self._to_mapping = {}
def test_API(): api = API('.') assert api.repos.exists() assert 'repository' in '%s' % api assert not api.path('unknown', 'path').exists()
def __init__(self, repos, wiki=None): API.__init__(self, repos) self.wiki = wiki or self.repos.resolve().parent / 'grambank.wiki' self.gb20 = GB20(self.path('gb20.txt'))
def __init__(self, repos=None, datasets=None, concepticon=None): API.__init__(self, repos) self.datasets = datasets or collections.OrderedDict() concepticon = concepticon if not concepticon: # pragma: no cover try: concepticon = Concepticon( Config.from_file().get_clone('concepticon')) except KeyError: pass datasets = set() self.annotations = collections.defaultdict( lambda: collections.OrderedDict()) for row in reader(self.repos / 'norare.tsv', delimiter='\t', dicts=True): self.annotations[row['DATASET']][row['NAME'].lower()] = { k.lower(): row[k] for k in [ 'DATASET', 'NAME', 'LANGUAGE', 'STRUCTURE', 'TYPE', 'NORARE', 'RATING', 'SOURCE', 'OTHER', 'NOTE' ] } datasets.add(row['DATASET']) # get bibliography self.refs = collections.OrderedDict() with self.repos.joinpath( 'references', 'references.bib').open(encoding='utf-8') as fp: for key, entry in pybtex.database.parse_string( fp.read(), bib_format='bibtex').entries.items(): self.refs[key] = Source.from_entry(key, entry) all_refs = set(self.refs) if concepticon: all_refs = all_refs.union(concepticon.bibliography) for row in reader(self.repos / 'concept_set_meta.tsv', delimiter='\t', dicts=True): row['norare'] = self row['path'] = self.repos.joinpath('concept_set_meta', row['ID'], row['ID'] + '.tsv-metadata.json') self.datasets[row['ID']] = ConceptSetMeta( **{k.lower(): v for k, v in row.items()}) self.datasets[row['ID']].source_language = [ lg.lower().strip() for lg in self.datasets[row['ID']].source_language.split(',') ] # remaining datasets come from concepticon, we identify them from datasets concepticon_datasets = [d for d in datasets if d not in self.datasets] for dataset in concepticon_datasets: ds = concepticon.conceptlists[dataset] self.datasets[ds.id] = ConceptSetMeta( id=ds.id, author=ds.author, year=ds.year, tags=', '.join(ds.tags), source_language=ds.source_language, target_language=ds.target_language, url=ds.url, refs=ds.refs, note=ds.note, alias=ds.alias, norare=self, path=concepticon.repos.joinpath('concepticondata', 'conceptlists', ds.id + '.tsv-metadata.json')) for dataset in self.datasets.values(): if dataset.refs: refs = [dataset.refs] if isinstance(dataset.refs, str) else dataset.refs for ref in refs: if ref not in all_refs: # pragma: no cover raise ValueError( 'missing references.bib: {}'.format(ref))