Ejemplo n.º 1
0
def test_import_module(tmppath):
    from clldutils.path import import_module

    make_file(tmppath, name='__init__.py', text='A = [1, 2, 3]')
    syspath = sys.path[:]
    m = import_module(tmppath)
    assert len(m.A) == 3
    assert syspath == sys.path

    make_file(tmppath, name='abcd.py', text='A = [1, 2, 3]')
    m = import_module(tmppath / 'abcd.py')
    assert len(m.A) == 3
Ejemplo n.º 2
0
    def test_import_module(self):
        from clldutils.path import import_module

        with self.tmp_path('__init__.py').open('w', encoding='ascii') as fp:
            fp.write('A = [1, 2, 3]')

        m = import_module(self.tmp_path())
        self.assertEqual(len(m.A), 3)

        with self.tmp_path('mod.py').open('w', encoding='ascii') as fp:
            fp.write('A = [1, 2, 3]')

        m = import_module(self.tmp_path('mod.py'))
        self.assertEqual(len(m.A), 3)
Ejemplo n.º 3
0
def dataset(repos, tmpd, glottolog, concepticon):
    from mock import Mock

    mod = import_module(repos / 'datasets' / 'test_dataset')
    ds = mod.Test(glottolog=glottolog, concepticon=concepticon)
    ds._install(log=Mock())
    return ds
Ejemplo n.º 4
0
def test_tokenizer(repos, string, tokens):
    import warnings

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mod = import_module(repos / 'datasets' / 'test_dataset_cldf')
        dataset = mod.Test()
        assert dataset.tokenizer(None, string) == tokens.split()
Ejemplo n.º 5
0
    def __init__(self, path, glottolog_repos=None):
        """
        A dataset is initialzed by passing its directory path.
        """
        path = Path(path)
        self.id = path.name
        self.log = logging.getLogger(pygelato.__name__)
        self.dir = path

        # raw data
        self.raw = self.dir.joinpath('raw', 'data')
        if not self.raw.exists():
            self.raw.mkdir()

        # processed data
        self.processed = self.dir.joinpath('processed')
        if not self.processed.exists():
            self.processed.mkdir()

        self.commands = import_module(self.dir)
        self.md = jsonlib.load(self.dir.joinpath('metadata.json'))
Ejemplo n.º 6
0
    def __init__(self, path):
        """
        A dataset is initialzed passing its directory path.
        """
        path = Path(path)
        self.id = path.name
        self.log = logging.getLogger(pylexibank.__name__)
        self.dir = path
        self.raw = self.dir.joinpath('raw', 'data')
        if not self.raw.exists():
            self.raw.mkdir()
        self.cldf_dir = self.dir.joinpath('cldf')
        if not self.cldf_dir.exists():
            self.cldf_dir.mkdir()
        self.commands = import_module(self.dir)
        self.md = jsonlib.load(self.dir.joinpath('metadata.json'))
        self.languages = []
        lpath = self.dir.joinpath('languages.csv')
        if lpath.exists():
            for item in reader(lpath, dicts=True):
                if item['GLOTTOCODE'] and not GC_PATTERN.match(
                        item['GLOTTOCODE']):
                    raise ValueError("Wrong glottocode for item {0}".format(
                        item['GLOTTOCODE']))
                self.languages.append(item)
        self.conceptlist = None
        url = self.md.get('dc:conformsTo')
        if url and url.startswith(
                'http://concepticon.clld.org/contributions/'):
            self.conceptlist = url.split('/')[-1]
        self.concepts = []
        cpath = self.dir.joinpath('concepts.csv')
        if cpath.exists():
            self.concepts = list(reader(cpath, dicts=True))
        self.cognates = Cognates()

        # the following attributes are only set when a dataset's cldf method is run:
        self.glottolog_languoids = {}
        self.glottolog_version, self.concepticon_version = None, None
Ejemplo n.º 7
0
    def __init__(self,
                 name,
                 data_path=data_path,
                 base='cddb',
                 _languages=_languages,
                 _path=cddb_path):
        self.id = name
        self.get_path = data_path(name)
        self.path = self.get_path('')
        self._data = {}

        # get the languages
        self.languages = csv2dict(self.get_path('languages.csv'),
                                  key=base,
                                  prefix=self.id)
        self.lid2lang = dict([(self.languages[x][self.id + '_id'], x)
                              for x in self.languages])
        for k in self.languages:
            for h, v in _languages[k].items():
                self.languages[k][h.lower()] = v

        self.metadata = json2dict(self.get_path('metadata.json'))
        #with with_sys_path(Path(_path('datasets'))) as f:
        self.commands = import_module(Path(_path('datasets', self.id)))

        # try to get a concept list
        clist = False
        if os.path.isfile(self.get_path('concepts.csv')):
            clist = Conceptlist.from_file(self.get_path('concepts.csv'))
        elif 'concepts' in self.metadata:
            clist = Concepticon().conceptlists[self.metadata['concepts']]
        if clist:
            self.concepts = {
                c.gloss or c.english: c
                for c in clist.concepts.values()
            }

        if 'profile' in self.metadata:
            self.transform = get_transformer(self.metadata['profile'])
Ejemplo n.º 8
0
def dataset_cldf(repos, tmpd, glottolog, concepticon):
    mod = import_module(repos / 'datasets' / 'test_dataset_cldf')
    return mod.Test(glottolog=glottolog, concepticon=concepticon)
Ejemplo n.º 9
0
def import_module(dotted_name_or_path):
    p = pathlib.Path(dotted_name_or_path)
    if p.exists():
        return path.import_module(p.resolve())
    return importlib.import_module(dotted_name_or_path)  # pragma: no cover