def test_import_module(tmppath): from clldutils.path import import_module make_file(tmppath, name='__init__.py', text='A = [1, 2, 3]') syspath = sys.path[:] m = import_module(tmppath) assert len(m.A) == 3 assert syspath == sys.path make_file(tmppath, name='abcd.py', text='A = [1, 2, 3]') m = import_module(tmppath / 'abcd.py') assert len(m.A) == 3
def test_import_module(self): from clldutils.path import import_module with self.tmp_path('__init__.py').open('w', encoding='ascii') as fp: fp.write('A = [1, 2, 3]') m = import_module(self.tmp_path()) self.assertEqual(len(m.A), 3) with self.tmp_path('mod.py').open('w', encoding='ascii') as fp: fp.write('A = [1, 2, 3]') m = import_module(self.tmp_path('mod.py')) self.assertEqual(len(m.A), 3)
def dataset(repos, tmpd, glottolog, concepticon): from mock import Mock mod = import_module(repos / 'datasets' / 'test_dataset') ds = mod.Test(glottolog=glottolog, concepticon=concepticon) ds._install(log=Mock()) return ds
def test_tokenizer(repos, string, tokens): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") mod = import_module(repos / 'datasets' / 'test_dataset_cldf') dataset = mod.Test() assert dataset.tokenizer(None, string) == tokens.split()
def __init__(self, path, glottolog_repos=None): """ A dataset is initialzed by passing its directory path. """ path = Path(path) self.id = path.name self.log = logging.getLogger(pygelato.__name__) self.dir = path # raw data self.raw = self.dir.joinpath('raw', 'data') if not self.raw.exists(): self.raw.mkdir() # processed data self.processed = self.dir.joinpath('processed') if not self.processed.exists(): self.processed.mkdir() self.commands = import_module(self.dir) self.md = jsonlib.load(self.dir.joinpath('metadata.json'))
def __init__(self, path): """ A dataset is initialzed passing its directory path. """ path = Path(path) self.id = path.name self.log = logging.getLogger(pylexibank.__name__) self.dir = path self.raw = self.dir.joinpath('raw', 'data') if not self.raw.exists(): self.raw.mkdir() self.cldf_dir = self.dir.joinpath('cldf') if not self.cldf_dir.exists(): self.cldf_dir.mkdir() self.commands = import_module(self.dir) self.md = jsonlib.load(self.dir.joinpath('metadata.json')) self.languages = [] lpath = self.dir.joinpath('languages.csv') if lpath.exists(): for item in reader(lpath, dicts=True): if item['GLOTTOCODE'] and not GC_PATTERN.match( item['GLOTTOCODE']): raise ValueError("Wrong glottocode for item {0}".format( item['GLOTTOCODE'])) self.languages.append(item) self.conceptlist = None url = self.md.get('dc:conformsTo') if url and url.startswith( 'http://concepticon.clld.org/contributions/'): self.conceptlist = url.split('/')[-1] self.concepts = [] cpath = self.dir.joinpath('concepts.csv') if cpath.exists(): self.concepts = list(reader(cpath, dicts=True)) self.cognates = Cognates() # the following attributes are only set when a dataset's cldf method is run: self.glottolog_languoids = {} self.glottolog_version, self.concepticon_version = None, None
def __init__(self, name, data_path=data_path, base='cddb', _languages=_languages, _path=cddb_path): self.id = name self.get_path = data_path(name) self.path = self.get_path('') self._data = {} # get the languages self.languages = csv2dict(self.get_path('languages.csv'), key=base, prefix=self.id) self.lid2lang = dict([(self.languages[x][self.id + '_id'], x) for x in self.languages]) for k in self.languages: for h, v in _languages[k].items(): self.languages[k][h.lower()] = v self.metadata = json2dict(self.get_path('metadata.json')) #with with_sys_path(Path(_path('datasets'))) as f: self.commands = import_module(Path(_path('datasets', self.id))) # try to get a concept list clist = False if os.path.isfile(self.get_path('concepts.csv')): clist = Conceptlist.from_file(self.get_path('concepts.csv')) elif 'concepts' in self.metadata: clist = Concepticon().conceptlists[self.metadata['concepts']] if clist: self.concepts = { c.gloss or c.english: c for c in clist.concepts.values() } if 'profile' in self.metadata: self.transform = get_transformer(self.metadata['profile'])
def dataset_cldf(repos, tmpd, glottolog, concepticon): mod = import_module(repos / 'datasets' / 'test_dataset_cldf') return mod.Test(glottolog=glottolog, concepticon=concepticon)
def import_module(dotted_name_or_path): p = pathlib.Path(dotted_name_or_path) if p.exists(): return path.import_module(p.resolve()) return importlib.import_module(dotted_name_or_path) # pragma: no cover